showard | 34ab099 | 2009-10-05 22:47:57 +0000 | [diff] [blame] | 1 | #!/usr/bin/python |
| 2 | |
| 3 | import logging, unittest |
| 4 | import common |
showard | f85a0b7 | 2009-10-07 20:48:45 +0000 | [diff] [blame] | 5 | from autotest_lib.client.common_lib import enum, global_config |
showard | 34ab099 | 2009-10-05 22:47:57 +0000 | [diff] [blame] | 6 | from autotest_lib.database import database_connection |
| 7 | from autotest_lib.frontend import setup_django_environment |
showard | b890045 | 2009-10-12 20:31:01 +0000 | [diff] [blame^] | 8 | from autotest_lib.frontend.afe import frontend_test_utils, models |
showard | 34ab099 | 2009-10-05 22:47:57 +0000 | [diff] [blame] | 9 | from autotest_lib.scheduler import drone_manager, email_manager, monitor_db |
| 10 | |
| 11 | # translations necessary for scheduler queries to work with SQLite |
| 12 | _re_translator = database_connection.TranslatingDatabase.make_regexp_translator |
| 13 | _DB_TRANSLATORS = ( |
| 14 | _re_translator(r'NOW\(\)', 'time("now")'), |
| 15 | # older SQLite doesn't support group_concat, so just don't bother until |
| 16 | # it arises in an important query |
| 17 | _re_translator(r'GROUP_CONCAT\((.*?)\)', r'\1'), |
| 18 | ) |
| 19 | |
| 20 | class NullMethodObject(object): |
| 21 | _NULL_METHODS = () |
| 22 | |
| 23 | def __init__(self): |
| 24 | def null_method(*args, **kwargs): |
| 25 | pass |
| 26 | |
| 27 | for method_name in self._NULL_METHODS: |
| 28 | setattr(self, method_name, null_method) |
| 29 | |
| 30 | class MockGlobalConfig(object): |
| 31 | def __init__(self): |
| 32 | self._config_info = {} |
| 33 | |
| 34 | |
| 35 | def set_config_value(self, section, key, value): |
| 36 | self._config_info[(section, key)] = value |
| 37 | |
| 38 | |
| 39 | def get_config_value(self, section, key, type=str, |
| 40 | default=None, allow_blank=False): |
| 41 | identifier = (section, key) |
| 42 | if identifier not in self._config_info: |
| 43 | raise RuntimeError('Unset global config value: %s' % (identifier,)) |
| 44 | return self._config_info[identifier] |
| 45 | |
| 46 | |
showard | f85a0b7 | 2009-10-07 20:48:45 +0000 | [diff] [blame] | 47 | # the SpecialTask names here must match the suffixes used on the SpecialTask |
| 48 | # results directories |
| 49 | _PidfileType = enum.Enum('verify', 'cleanup', 'repair', 'job', 'gather', |
| 50 | 'parse') |
| 51 | |
| 52 | |
showard | 34ab099 | 2009-10-05 22:47:57 +0000 | [diff] [blame] | 53 | class MockDroneManager(NullMethodObject): |
| 54 | _NULL_METHODS = ('refresh', 'reinitialize_drones', |
| 55 | 'copy_to_results_repository') |
| 56 | |
| 57 | def __init__(self): |
| 58 | super(MockDroneManager, self).__init__() |
| 59 | # maps result_dir to set of tuples (file_path, file_contents) |
| 60 | self._attached_files = {} |
| 61 | # maps pidfile IDs to PidfileContents |
| 62 | self._pidfiles = {} |
| 63 | # pidfile IDs that haven't been created yet |
| 64 | self._future_pidfiles = [] |
showard | f85a0b7 | 2009-10-07 20:48:45 +0000 | [diff] [blame] | 65 | # maps _PidfileType to the most recently created pidfile ID of that type |
| 66 | self._last_pidfile_id = {} |
showard | 34ab099 | 2009-10-05 22:47:57 +0000 | [diff] [blame] | 67 | # maps (working_directory, pidfile_name) to pidfile IDs |
| 68 | self._pidfile_index = {} |
showard | f85a0b7 | 2009-10-07 20:48:45 +0000 | [diff] [blame] | 69 | # maps process to pidfile IDs |
| 70 | self._process_index = {} |
| 71 | # tracks pidfiles of processes that have been killed |
| 72 | self._killed_pidfiles = set() |
showard | 34ab099 | 2009-10-05 22:47:57 +0000 | [diff] [blame] | 73 | |
| 74 | |
| 75 | # utility APIs for use by the test |
| 76 | |
showard | f85a0b7 | 2009-10-07 20:48:45 +0000 | [diff] [blame] | 77 | def finish_process(self, pidfile_type, exit_status=0): |
| 78 | pidfile_id = self._last_pidfile_id[pidfile_type] |
| 79 | self._set_pidfile_exit_status(pidfile_id, exit_status) |
showard | 34ab099 | 2009-10-05 22:47:57 +0000 | [diff] [blame] | 80 | |
| 81 | |
showard | 34ab099 | 2009-10-05 22:47:57 +0000 | [diff] [blame] | 82 | def _set_pidfile_exit_status(self, pidfile_id, exit_status): |
showard | f85a0b7 | 2009-10-07 20:48:45 +0000 | [diff] [blame] | 83 | assert pidfile_id is not None |
showard | 34ab099 | 2009-10-05 22:47:57 +0000 | [diff] [blame] | 84 | contents = self._pidfiles[pidfile_id] |
| 85 | contents.exit_status = exit_status |
| 86 | contents.num_tests_failed = 0 |
| 87 | |
| 88 | |
showard | f85a0b7 | 2009-10-07 20:48:45 +0000 | [diff] [blame] | 89 | def was_last_process_killed(self, pidfile_type): |
| 90 | pidfile_id = self._last_pidfile_id[pidfile_type] |
| 91 | return pidfile_id in self._killed_pidfiles |
| 92 | |
| 93 | |
showard | 34ab099 | 2009-10-05 22:47:57 +0000 | [diff] [blame] | 94 | # DroneManager emulation APIs for use by monitor_db |
| 95 | |
| 96 | def get_orphaned_autoserv_processes(self): |
| 97 | return set() |
| 98 | |
| 99 | |
| 100 | def total_running_processes(self): |
| 101 | return 0 |
| 102 | |
| 103 | |
| 104 | def max_runnable_processes(self): |
| 105 | return 100 |
| 106 | |
| 107 | |
| 108 | def execute_actions(self): |
| 109 | # executing an "execute_command" causes a pidfile to be created |
| 110 | for pidfile_id in self._future_pidfiles: |
| 111 | # Process objects are opaque to monitor_db |
showard | f85a0b7 | 2009-10-07 20:48:45 +0000 | [diff] [blame] | 112 | process = object() |
| 113 | self._pidfiles[pidfile_id].process = process |
| 114 | self._process_index[process] = pidfile_id |
showard | 34ab099 | 2009-10-05 22:47:57 +0000 | [diff] [blame] | 115 | self._future_pidfiles = [] |
| 116 | |
| 117 | |
| 118 | def attach_file_to_execution(self, result_dir, file_contents, |
| 119 | file_path=None): |
| 120 | self._attached_files.setdefault(result_dir, set()).add((file_path, |
| 121 | file_contents)) |
| 122 | return 'attach_path' |
| 123 | |
| 124 | |
showard | f85a0b7 | 2009-10-07 20:48:45 +0000 | [diff] [blame] | 125 | def _initialize_pidfile(self, pidfile_id): |
| 126 | if pidfile_id not in self._pidfiles: |
| 127 | self._pidfiles[pidfile_id] = drone_manager.PidfileContents() |
| 128 | |
| 129 | |
| 130 | _pidfile_type_map = { |
| 131 | monitor_db._AUTOSERV_PID_FILE: _PidfileType.JOB, |
| 132 | monitor_db._CRASHINFO_PID_FILE: _PidfileType.GATHER, |
| 133 | monitor_db._PARSER_PID_FILE: _PidfileType.PARSE, |
| 134 | } |
| 135 | |
| 136 | |
| 137 | def _set_last_pidfile(self, pidfile_id, working_directory, pidfile_name): |
| 138 | if working_directory.startswith('hosts/'): |
| 139 | # such paths look like hosts/host1/1-verify, we'll grab the end |
| 140 | type_string = working_directory.rsplit('-', 1)[1] |
| 141 | pidfile_type = _PidfileType.get_value(type_string) |
| 142 | else: |
| 143 | pidfile_type = self._pidfile_type_map[pidfile_name] |
| 144 | self._last_pidfile_id[pidfile_type] = pidfile_id |
| 145 | |
| 146 | |
showard | 34ab099 | 2009-10-05 22:47:57 +0000 | [diff] [blame] | 147 | def execute_command(self, command, working_directory, pidfile_name, |
| 148 | log_file=None, paired_with_pidfile=None): |
showard | 34ab099 | 2009-10-05 22:47:57 +0000 | [diff] [blame] | 149 | pidfile_id = object() # PidfileIds are opaque to monitor_db |
| 150 | self._future_pidfiles.append(pidfile_id) |
showard | f85a0b7 | 2009-10-07 20:48:45 +0000 | [diff] [blame] | 151 | self._initialize_pidfile(pidfile_id) |
showard | 34ab099 | 2009-10-05 22:47:57 +0000 | [diff] [blame] | 152 | self._pidfile_index[(working_directory, pidfile_name)] = pidfile_id |
showard | f85a0b7 | 2009-10-07 20:48:45 +0000 | [diff] [blame] | 153 | self._set_last_pidfile(pidfile_id, working_directory, pidfile_name) |
showard | 34ab099 | 2009-10-05 22:47:57 +0000 | [diff] [blame] | 154 | return pidfile_id |
| 155 | |
| 156 | |
| 157 | def get_pidfile_contents(self, pidfile_id, use_second_read=False): |
| 158 | return self._pidfiles.get(pidfile_id, |
| 159 | drone_manager.PidfileContents()) |
| 160 | |
| 161 | |
| 162 | def is_process_running(self, process): |
| 163 | return True |
| 164 | |
| 165 | |
| 166 | def register_pidfile(self, pidfile_id): |
showard | f85a0b7 | 2009-10-07 20:48:45 +0000 | [diff] [blame] | 167 | self._initialize_pidfile(pidfile_id) |
| 168 | |
| 169 | |
| 170 | def unregister_pidfile(self, pidfile_id): |
| 171 | # intentionally handle non-registered pidfiles silently |
| 172 | self._pidfiles.pop(pidfile_id, None) |
showard | 34ab099 | 2009-10-05 22:47:57 +0000 | [diff] [blame] | 173 | |
| 174 | |
| 175 | def absolute_path(self, path): |
| 176 | return 'absolute/' + path |
| 177 | |
| 178 | |
| 179 | def write_lines_to_file(self, file_path, lines, paired_with_process=None): |
| 180 | # TODO: record this |
| 181 | pass |
| 182 | |
| 183 | |
| 184 | def get_pidfile_id_from(self, execution_tag, pidfile_name): |
| 185 | return self._pidfile_index.get((execution_tag, pidfile_name), object()) |
| 186 | |
| 187 | |
showard | f85a0b7 | 2009-10-07 20:48:45 +0000 | [diff] [blame] | 188 | def kill_process(self, process): |
| 189 | pidfile_id = self._process_index[process] |
| 190 | self._killed_pidfiles.add(pidfile_id) |
| 191 | self._set_pidfile_exit_status(pidfile_id, 271) |
| 192 | |
| 193 | |
showard | 34ab099 | 2009-10-05 22:47:57 +0000 | [diff] [blame] | 194 | class MockEmailManager(NullMethodObject): |
| 195 | _NULL_METHODS = ('send_queued_emails', 'send_email') |
| 196 | |
showard | f85a0b7 | 2009-10-07 20:48:45 +0000 | [diff] [blame] | 197 | def enqueue_notify_email(self, subject, message): |
| 198 | logging.warn('enqueue_notify_email: %s', subject) |
| 199 | logging.warn(message) |
| 200 | |
showard | 34ab099 | 2009-10-05 22:47:57 +0000 | [diff] [blame] | 201 | |
| 202 | class SchedulerFunctionalTest(unittest.TestCase, |
| 203 | frontend_test_utils.FrontendTestMixin): |
| 204 | # some number of ticks after which the scheduler is presumed to have |
| 205 | # stabilized, given no external changes |
| 206 | _A_LOT_OF_TICKS = 10 |
| 207 | |
| 208 | def setUp(self): |
| 209 | self._frontend_common_setup() |
| 210 | self._set_stubs() |
| 211 | self._set_global_config_values() |
| 212 | self.dispatcher = monitor_db.Dispatcher() |
| 213 | |
| 214 | logging.basicConfig(level=logging.DEBUG) |
| 215 | |
| 216 | |
| 217 | def tearDown(self): |
| 218 | self._frontend_common_teardown() |
| 219 | |
| 220 | |
| 221 | def _set_stubs(self): |
| 222 | self.mock_config = MockGlobalConfig() |
| 223 | self.god.stub_with(global_config, 'global_config', self.mock_config) |
| 224 | |
| 225 | self.mock_drone_manager = MockDroneManager() |
| 226 | self.god.stub_with(monitor_db, '_drone_manager', |
| 227 | self.mock_drone_manager) |
| 228 | |
| 229 | self.mock_email_manager = MockEmailManager() |
| 230 | self.god.stub_with(email_manager, 'manager', self.mock_email_manager) |
| 231 | |
| 232 | self._database = ( |
| 233 | database_connection.TranslatingDatabase.get_test_database( |
| 234 | file_path=self._test_db_file, |
| 235 | translators=_DB_TRANSLATORS)) |
| 236 | self._database.connect(db_type='django') |
| 237 | self.god.stub_with(monitor_db, '_db', self._database) |
| 238 | |
| 239 | |
| 240 | def _set_global_config_values(self): |
| 241 | self.mock_config.set_config_value('SCHEDULER', 'pidfile_timeout_mins', |
| 242 | 1) |
| 243 | |
| 244 | |
| 245 | def _initialize_test(self): |
| 246 | self.dispatcher.initialize() |
| 247 | |
| 248 | |
| 249 | def _run_dispatcher(self): |
| 250 | for _ in xrange(self._A_LOT_OF_TICKS): |
| 251 | self.dispatcher.tick() |
| 252 | |
| 253 | |
| 254 | def test_idle(self): |
showard | b890045 | 2009-10-12 20:31:01 +0000 | [diff] [blame^] | 255 | self._initialize_test() |
showard | 34ab099 | 2009-10-05 22:47:57 +0000 | [diff] [blame] | 256 | self._run_dispatcher() |
| 257 | |
| 258 | |
showard | b890045 | 2009-10-12 20:31:01 +0000 | [diff] [blame^] | 259 | def _assert_process_executed(self, working_directory, pidfile_name): |
| 260 | process_was_executed = self.mock_drone_manager.was_process_executed( |
| 261 | 'hosts/host1/1-verify', monitor_db._AUTOSERV_PID_FILE) |
| 262 | self.assert_(process_was_executed, |
| 263 | '%s/%s not executed' % (working_directory, pidfile_name)) |
| 264 | |
| 265 | |
showard | 34ab099 | 2009-10-05 22:47:57 +0000 | [diff] [blame] | 266 | def test_simple_job(self): |
showard | b890045 | 2009-10-12 20:31:01 +0000 | [diff] [blame^] | 267 | self._initialize_test() |
| 268 | job, queue_entry = self._make_job_and_queue_entry() |
showard | 34ab099 | 2009-10-05 22:47:57 +0000 | [diff] [blame] | 269 | self._run_dispatcher() # launches verify |
showard | f85a0b7 | 2009-10-07 20:48:45 +0000 | [diff] [blame] | 270 | self.mock_drone_manager.finish_process(_PidfileType.VERIFY) |
showard | 34ab099 | 2009-10-05 22:47:57 +0000 | [diff] [blame] | 271 | self._run_dispatcher() # launches job |
showard | b890045 | 2009-10-12 20:31:01 +0000 | [diff] [blame^] | 272 | self._finish_job() |
| 273 | |
| 274 | # update from DB |
| 275 | queue_entry = models.HostQueueEntry.objects.get(id=queue_entry.id) |
| 276 | self.assertEquals(queue_entry.status, |
| 277 | models.HostQueueEntry.Status.COMPLETED) |
| 278 | self.assertEquals(queue_entry.host.status, models.Host.Status.READY) |
| 279 | |
| 280 | |
| 281 | def _finish_job(self): |
showard | f85a0b7 | 2009-10-07 20:48:45 +0000 | [diff] [blame] | 282 | self.mock_drone_manager.finish_process(_PidfileType.JOB) |
showard | 34ab099 | 2009-10-05 22:47:57 +0000 | [diff] [blame] | 283 | self._run_dispatcher() # launches parsing + cleanup |
showard | f85a0b7 | 2009-10-07 20:48:45 +0000 | [diff] [blame] | 284 | self._finish_parsing_and_cleanup() |
| 285 | |
| 286 | |
| 287 | def _finish_parsing_and_cleanup(self): |
| 288 | self.mock_drone_manager.finish_process(_PidfileType.CLEANUP) |
| 289 | self.mock_drone_manager.finish_process(_PidfileType.PARSE) |
showard | 34ab099 | 2009-10-05 22:47:57 +0000 | [diff] [blame] | 290 | self._run_dispatcher() |
| 291 | |
| 292 | |
showard | f85a0b7 | 2009-10-07 20:48:45 +0000 | [diff] [blame] | 293 | def test_job_abort_in_verify(self): |
showard | b890045 | 2009-10-12 20:31:01 +0000 | [diff] [blame^] | 294 | self._initialize_test() |
showard | f85a0b7 | 2009-10-07 20:48:45 +0000 | [diff] [blame] | 295 | job = self._create_job(hosts=[1]) |
| 296 | self._run_dispatcher() # launches verify |
| 297 | job.hostqueueentry_set.update(aborted=True) |
| 298 | self._run_dispatcher() # kills verify, launches cleanup |
| 299 | self.assert_(self.mock_drone_manager.was_last_process_killed( |
| 300 | _PidfileType.VERIFY)) |
| 301 | self.mock_drone_manager.finish_process(_PidfileType.CLEANUP) |
| 302 | self._run_dispatcher() |
| 303 | |
| 304 | |
| 305 | def test_job_abort(self): |
showard | b890045 | 2009-10-12 20:31:01 +0000 | [diff] [blame^] | 306 | self._initialize_test() |
showard | f85a0b7 | 2009-10-07 20:48:45 +0000 | [diff] [blame] | 307 | job = self._create_job(hosts=[1]) |
| 308 | job.run_verify = False |
| 309 | job.save() |
| 310 | |
| 311 | self._run_dispatcher() # launches job |
| 312 | job.hostqueueentry_set.update(aborted=True) |
| 313 | self._run_dispatcher() # kills job, launches gathering |
| 314 | self.assert_(self.mock_drone_manager.was_last_process_killed( |
| 315 | _PidfileType.JOB)) |
| 316 | self.mock_drone_manager.finish_process(_PidfileType.GATHER) |
| 317 | self._run_dispatcher() # launches parsing + cleanup |
| 318 | self._finish_parsing_and_cleanup() |
| 319 | |
| 320 | |
| 321 | def test_no_pidfile_leaking(self): |
showard | b890045 | 2009-10-12 20:31:01 +0000 | [diff] [blame^] | 322 | self._initialize_test() |
showard | f85a0b7 | 2009-10-07 20:48:45 +0000 | [diff] [blame] | 323 | self.test_simple_job() |
| 324 | self.assertEquals(self.mock_drone_manager._pidfiles, {}) |
| 325 | |
| 326 | self.test_job_abort_in_verify() |
| 327 | self.assertEquals(self.mock_drone_manager._pidfiles, {}) |
| 328 | |
| 329 | self.test_job_abort() |
| 330 | self.assertEquals(self.mock_drone_manager._pidfiles, {}) |
| 331 | |
| 332 | |
showard | b890045 | 2009-10-12 20:31:01 +0000 | [diff] [blame^] | 333 | def _make_job_and_queue_entry(self): |
| 334 | job = self._create_job(hosts=[1]) |
| 335 | queue_entry = job.hostqueueentry_set.all()[0] |
| 336 | return job, queue_entry |
| 337 | |
| 338 | |
| 339 | def test_recover_running_no_process(self): |
| 340 | # recovery should re-execute a Running HQE if no process is found |
| 341 | _, queue_entry = self._make_job_and_queue_entry() |
| 342 | queue_entry.status = models.HostQueueEntry.Status.RUNNING |
| 343 | queue_entry.execution_subdir = '1-myuser/host1' |
| 344 | queue_entry.save() |
| 345 | queue_entry.host.status = models.Host.Status.RUNNING |
| 346 | queue_entry.host.save() |
| 347 | |
| 348 | self._initialize_test() |
| 349 | self._run_dispatcher() |
| 350 | self._finish_job() |
| 351 | |
| 352 | |
| 353 | def test_recover_verifying_hqe_no_special_task(self): |
| 354 | # recovery should fail on a Verifing HQE with no corresponding |
| 355 | # Verify or Cleanup SpecialTask |
| 356 | _, queue_entry = self._make_job_and_queue_entry() |
| 357 | queue_entry.status = models.HostQueueEntry.Status.VERIFYING |
| 358 | queue_entry.save() |
| 359 | |
| 360 | # make some dummy SpecialTasks that shouldn't count |
| 361 | models.SpecialTask.objects.create(host=queue_entry.host, |
| 362 | task=models.SpecialTask.Task.VERIFY) |
| 363 | models.SpecialTask.objects.create(host=queue_entry.host, |
| 364 | task=models.SpecialTask.Task.CLEANUP, |
| 365 | queue_entry=queue_entry, |
| 366 | is_complete=True) |
| 367 | |
| 368 | self.assertRaises(monitor_db.SchedulerError, self._initialize_test) |
| 369 | |
| 370 | |
| 371 | def _test_recover_verifying_hqe_helper(self, task, pidfile_type): |
| 372 | _, queue_entry = self._make_job_and_queue_entry() |
| 373 | queue_entry.status = models.HostQueueEntry.Status.VERIFYING |
| 374 | queue_entry.save() |
| 375 | |
| 376 | special_task = models.SpecialTask.objects.create( |
| 377 | host=queue_entry.host, task=task, queue_entry=queue_entry) |
| 378 | |
| 379 | self._initialize_test() |
| 380 | self._run_dispatcher() |
| 381 | self.mock_drone_manager.finish_process(pidfile_type) |
| 382 | self._run_dispatcher() |
| 383 | # don't bother checking the rest of the job execution, as long as the |
| 384 | # SpecialTask ran |
| 385 | |
| 386 | |
| 387 | def test_recover_verifying_hqe_with_cleanup(self): |
| 388 | # recover an HQE that was in pre-job cleanup |
| 389 | self._test_recover_verifying_hqe_helper(models.SpecialTask.Task.CLEANUP, |
| 390 | _PidfileType.CLEANUP) |
| 391 | |
| 392 | |
| 393 | def test_recover_verifying_hqe_with_verify(self): |
| 394 | # recover an HQE that was in pre-job verify |
| 395 | self._test_recover_verifying_hqe_helper(models.SpecialTask.Task.VERIFY, |
| 396 | _PidfileType.VERIFY) |
| 397 | |
| 398 | |
showard | 34ab099 | 2009-10-05 22:47:57 +0000 | [diff] [blame] | 399 | if __name__ == '__main__': |
| 400 | unittest.main() |