showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 1 | import os, re, shutil, signal, subprocess, errno, time, heapq, traceback |
showard | b18134f | 2009-03-20 20:52:18 +0000 | [diff] [blame] | 2 | import common, logging |
showard | c5afc46 | 2009-01-13 00:09:39 +0000 | [diff] [blame] | 3 | from autotest_lib.client.common_lib import error, global_config |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 4 | from autotest_lib.scheduler import email_manager, drone_utility, drones |
showard | 324bf81 | 2009-01-20 23:23:38 +0000 | [diff] [blame] | 5 | from autotest_lib.scheduler import scheduler_config |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 6 | |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 7 | |
showard | c75fded | 2009-10-14 16:20:02 +0000 | [diff] [blame] | 8 | # results on drones will be placed under the drone_installation_directory in a |
| 9 | # directory with this name |
| 10 | _DRONE_RESULTS_DIR_SUFFIX = 'results' |
| 11 | |
showard | ed2afea | 2009-07-07 20:54:07 +0000 | [diff] [blame] | 12 | WORKING_DIRECTORY = object() # see execute_command() |
| 13 | |
showard | 8d3dbca | 2009-09-25 20:29:38 +0000 | [diff] [blame] | 14 | |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 15 | class DroneManagerError(Exception): |
| 16 | pass |
| 17 | |
| 18 | |
| 19 | class CustomEquals(object): |
| 20 | def _id(self): |
| 21 | raise NotImplementedError |
| 22 | |
| 23 | |
| 24 | def __eq__(self, other): |
| 25 | if not isinstance(other, type(self)): |
| 26 | return NotImplemented |
| 27 | return self._id() == other._id() |
| 28 | |
| 29 | |
| 30 | def __ne__(self, other): |
| 31 | return not self == other |
| 32 | |
| 33 | |
| 34 | def __hash__(self): |
| 35 | return hash(self._id()) |
| 36 | |
| 37 | |
| 38 | class Process(CustomEquals): |
| 39 | def __init__(self, hostname, pid, ppid=None): |
| 40 | self.hostname = hostname |
| 41 | self.pid = pid |
| 42 | self.ppid = ppid |
| 43 | |
| 44 | def _id(self): |
| 45 | return (self.hostname, self.pid) |
| 46 | |
| 47 | |
| 48 | def __str__(self): |
| 49 | return '%s/%s' % (self.hostname, self.pid) |
| 50 | |
| 51 | |
| 52 | def __repr__(self): |
| 53 | return super(Process, self).__repr__() + '<%s>' % self |
| 54 | |
| 55 | |
| 56 | class PidfileId(CustomEquals): |
| 57 | def __init__(self, path): |
| 58 | self.path = path |
| 59 | |
| 60 | |
| 61 | def _id(self): |
| 62 | return self.path |
| 63 | |
| 64 | |
| 65 | def __str__(self): |
| 66 | return str(self.path) |
| 67 | |
| 68 | |
| 69 | class PidfileContents(object): |
| 70 | process = None |
| 71 | exit_status = None |
| 72 | num_tests_failed = None |
| 73 | |
| 74 | def is_invalid(self): |
| 75 | return False |
| 76 | |
| 77 | |
| 78 | class InvalidPidfile(object): |
| 79 | def __init__(self, error): |
| 80 | self.error = error |
| 81 | |
| 82 | |
| 83 | def is_invalid(self): |
| 84 | return True |
| 85 | |
| 86 | |
| 87 | def __str__(self): |
| 88 | return self.error |
| 89 | |
| 90 | |
| 91 | class DroneManager(object): |
| 92 | """ |
| 93 | This class acts as an interface from the scheduler to drones, whether it be |
| 94 | only a single "drone" for localhost or multiple remote drones. |
| 95 | |
| 96 | All paths going into and out of this class are relative to the full results |
| 97 | directory, except for those returns by absolute_path(). |
| 98 | """ |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 99 | def __init__(self): |
| 100 | self._results_dir = None |
| 101 | self._processes = {} |
| 102 | self._process_set = set() |
| 103 | self._pidfiles = {} |
| 104 | self._pidfiles_second_read = {} |
| 105 | self._pidfile_age = {} |
| 106 | self._temporary_path_counter = 0 |
| 107 | self._drones = {} |
| 108 | self._results_drone = None |
| 109 | self._attached_files = {} |
| 110 | self._drone_queue = [] |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 111 | |
| 112 | |
| 113 | def initialize(self, base_results_dir, drone_hostnames, |
| 114 | results_repository_hostname): |
| 115 | self._results_dir = base_results_dir |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 116 | |
| 117 | for hostname in drone_hostnames: |
showard | 4460ee8 | 2009-07-07 20:54:29 +0000 | [diff] [blame] | 118 | drone = self._add_drone(hostname) |
showard | 2aafd90 | 2009-10-14 16:20:14 +0000 | [diff] [blame] | 119 | drone.call('initialize', self.absolute_path('')) |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 120 | |
| 121 | if not self._drones: |
| 122 | # all drones failed to initialize |
| 123 | raise DroneManagerError('No valid drones found') |
| 124 | |
showard | 324bf81 | 2009-01-20 23:23:38 +0000 | [diff] [blame] | 125 | self.refresh_drone_configs() |
showard | c5afc46 | 2009-01-13 00:09:39 +0000 | [diff] [blame] | 126 | |
showard | 4460ee8 | 2009-07-07 20:54:29 +0000 | [diff] [blame] | 127 | logging.info('Using results repository on %s', |
showard | b18134f | 2009-03-20 20:52:18 +0000 | [diff] [blame] | 128 | results_repository_hostname) |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 129 | self._results_drone = drones.get_drone(results_repository_hostname) |
showard | ac5b000 | 2009-10-19 18:34:00 +0000 | [diff] [blame^] | 130 | results_installation_dir = global_config.global_config.get_config_value( |
| 131 | scheduler_config.CONFIG_SECTION, |
| 132 | 'results_host_installation_directory', default=None) |
| 133 | if results_installation_dir: |
| 134 | self._results_drone.set_autotest_install_dir( |
| 135 | results_installation_dir) |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 136 | # don't initialize() the results drone - we don't want to clear out any |
| 137 | # directories and we don't need ot kill any processes |
| 138 | |
| 139 | |
| 140 | def reinitialize_drones(self): |
| 141 | self._call_all_drones('initialize', self._results_dir) |
| 142 | |
| 143 | |
| 144 | def shutdown(self): |
showard | 324bf81 | 2009-01-20 23:23:38 +0000 | [diff] [blame] | 145 | for drone in self.get_drones(): |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 146 | drone.shutdown() |
| 147 | |
| 148 | |
showard | 8d3dbca | 2009-09-25 20:29:38 +0000 | [diff] [blame] | 149 | def _get_max_pidfile_refreshes(self): |
| 150 | """ |
| 151 | Normally refresh() is called on every monitor_db.Dispatcher.tick(). |
| 152 | |
| 153 | @returns: The number of refresh() calls before we forget a pidfile. |
| 154 | """ |
| 155 | pidfile_timeout = global_config.global_config.get_config_value( |
| 156 | scheduler_config.CONFIG_SECTION, 'max_pidfile_refreshes', |
| 157 | type=int, default=2000) |
| 158 | return pidfile_timeout |
| 159 | |
| 160 | |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 161 | def _add_drone(self, hostname): |
showard | b18134f | 2009-03-20 20:52:18 +0000 | [diff] [blame] | 162 | logging.info('Adding drone %s' % hostname) |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 163 | drone = drones.get_drone(hostname) |
| 164 | self._drones[drone.hostname] = drone |
| 165 | return drone |
| 166 | |
| 167 | |
| 168 | def _remove_drone(self, hostname): |
| 169 | self._drones.pop(hostname, None) |
| 170 | |
| 171 | |
showard | 324bf81 | 2009-01-20 23:23:38 +0000 | [diff] [blame] | 172 | def refresh_drone_configs(self): |
showard | c5afc46 | 2009-01-13 00:09:39 +0000 | [diff] [blame] | 173 | """ |
showard | 324bf81 | 2009-01-20 23:23:38 +0000 | [diff] [blame] | 174 | Reread global config options for all drones. |
showard | c5afc46 | 2009-01-13 00:09:39 +0000 | [diff] [blame] | 175 | """ |
showard | 324bf81 | 2009-01-20 23:23:38 +0000 | [diff] [blame] | 176 | config = global_config.global_config |
| 177 | section = scheduler_config.CONFIG_SECTION |
| 178 | config.parse_config_file() |
showard | c5afc46 | 2009-01-13 00:09:39 +0000 | [diff] [blame] | 179 | for hostname, drone in self._drones.iteritems(): |
showard | 324bf81 | 2009-01-20 23:23:38 +0000 | [diff] [blame] | 180 | disabled = config.get_config_value( |
| 181 | section, '%s_disabled' % hostname, default='') |
showard | c5afc46 | 2009-01-13 00:09:39 +0000 | [diff] [blame] | 182 | drone.enabled = not bool(disabled) |
| 183 | |
showard | 324bf81 | 2009-01-20 23:23:38 +0000 | [diff] [blame] | 184 | drone.max_processes = config.get_config_value( |
| 185 | section, '%s_max_processes' % hostname, type=int, |
| 186 | default=scheduler_config.config.max_processes_per_drone) |
showard | c5afc46 | 2009-01-13 00:09:39 +0000 | [diff] [blame] | 187 | |
| 188 | |
showard | 324bf81 | 2009-01-20 23:23:38 +0000 | [diff] [blame] | 189 | def get_drones(self): |
| 190 | return self._drones.itervalues() |
showard | c5afc46 | 2009-01-13 00:09:39 +0000 | [diff] [blame] | 191 | |
| 192 | |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 193 | def _get_drone_for_process(self, process): |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 194 | return self._drones[process.hostname] |
| 195 | |
| 196 | |
| 197 | def _get_drone_for_pidfile_id(self, pidfile_id): |
| 198 | pidfile_contents = self.get_pidfile_contents(pidfile_id) |
| 199 | assert pidfile_contents.process is not None |
| 200 | return self._get_drone_for_process(pidfile_contents.process) |
| 201 | |
| 202 | |
| 203 | def _drop_old_pidfiles(self): |
| 204 | for pidfile_id, age in self._pidfile_age.items(): |
showard | 8d3dbca | 2009-09-25 20:29:38 +0000 | [diff] [blame] | 205 | if age > self._get_max_pidfile_refreshes(): |
showard | f85a0b7 | 2009-10-07 20:48:45 +0000 | [diff] [blame] | 206 | logging.warning('dropping leaked pidfile %s', pidfile_id) |
| 207 | self.unregister_pidfile(pidfile_id) |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 208 | else: |
| 209 | self._pidfile_age[pidfile_id] += 1 |
| 210 | |
| 211 | |
| 212 | def _reset(self): |
| 213 | self._processes = {} |
| 214 | self._process_set = set() |
| 215 | self._pidfiles = {} |
| 216 | self._pidfiles_second_read = {} |
| 217 | self._drone_queue = [] |
| 218 | |
| 219 | |
| 220 | def _call_all_drones(self, method, *args, **kwargs): |
| 221 | all_results = {} |
showard | 324bf81 | 2009-01-20 23:23:38 +0000 | [diff] [blame] | 222 | for drone in self.get_drones(): |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 223 | all_results[drone] = drone.call(method, *args, **kwargs) |
| 224 | return all_results |
| 225 | |
| 226 | |
| 227 | def _parse_pidfile(self, drone, raw_contents): |
| 228 | contents = PidfileContents() |
| 229 | if not raw_contents: |
| 230 | return contents |
| 231 | lines = raw_contents.splitlines() |
| 232 | if len(lines) > 3: |
| 233 | return InvalidPidfile('Corrupt pid file (%d lines):\n%s' % |
| 234 | (len(lines), lines)) |
| 235 | try: |
| 236 | pid = int(lines[0]) |
| 237 | contents.process = Process(drone.hostname, pid) |
| 238 | # if len(lines) == 2, assume we caught Autoserv between writing |
| 239 | # exit_status and num_failed_tests, so just ignore it and wait for |
| 240 | # the next cycle |
| 241 | if len(lines) == 3: |
| 242 | contents.exit_status = int(lines[1]) |
| 243 | contents.num_tests_failed = int(lines[2]) |
| 244 | except ValueError, exc: |
| 245 | return InvalidPidfile('Corrupt pid file: ' + str(exc.args)) |
| 246 | |
| 247 | return contents |
| 248 | |
| 249 | |
| 250 | def _process_pidfiles(self, drone, pidfiles, store_in_dict): |
| 251 | for pidfile_path, contents in pidfiles.iteritems(): |
| 252 | pidfile_id = PidfileId(pidfile_path) |
| 253 | contents = self._parse_pidfile(drone, contents) |
| 254 | store_in_dict[pidfile_id] = contents |
| 255 | |
| 256 | |
showard | 0205a3e | 2009-01-16 03:03:50 +0000 | [diff] [blame] | 257 | def _add_process(self, drone, process_info): |
| 258 | process = Process(drone.hostname, int(process_info['pid']), |
| 259 | int(process_info['ppid'])) |
| 260 | self._process_set.add(process) |
| 261 | return process |
| 262 | |
| 263 | |
| 264 | def _add_autoserv_process(self, drone, process_info): |
| 265 | assert process_info['comm'] == 'autoserv' |
| 266 | # only root autoserv processes have pgid == pid |
| 267 | if process_info['pgid'] != process_info['pid']: |
| 268 | return |
| 269 | process = self._add_process(drone, process_info) |
| 270 | execution_tag = self._execution_tag_for_process(drone, process_info) |
| 271 | self._processes[execution_tag] = process |
| 272 | |
| 273 | |
showard | 324bf81 | 2009-01-20 23:23:38 +0000 | [diff] [blame] | 274 | def _enqueue_drone(self, drone): |
| 275 | heapq.heappush(self._drone_queue, (drone.used_capacity(), drone)) |
| 276 | |
| 277 | |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 278 | def refresh(self): |
| 279 | """ |
| 280 | Called at the beginning of a scheduler cycle to refresh all process |
| 281 | information. |
| 282 | """ |
| 283 | self._reset() |
showard | bf9695d | 2009-07-06 20:22:24 +0000 | [diff] [blame] | 284 | self._drop_old_pidfiles() |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 285 | pidfile_paths = [pidfile_id.path for pidfile_id in self._pidfile_age] |
| 286 | all_results = self._call_all_drones('refresh', pidfile_paths) |
| 287 | |
| 288 | for drone, results_list in all_results.iteritems(): |
| 289 | results = results_list[0] |
showard | 324bf81 | 2009-01-20 23:23:38 +0000 | [diff] [blame] | 290 | drone.active_processes = len(results['autoserv_processes']) |
showard | c5afc46 | 2009-01-13 00:09:39 +0000 | [diff] [blame] | 291 | if drone.enabled: |
showard | 324bf81 | 2009-01-20 23:23:38 +0000 | [diff] [blame] | 292 | self._enqueue_drone(drone) |
showard | 0205a3e | 2009-01-16 03:03:50 +0000 | [diff] [blame] | 293 | |
| 294 | for process_info in results['autoserv_processes']: |
| 295 | self._add_autoserv_process(drone, process_info) |
| 296 | for process_info in results['parse_processes']: |
| 297 | self._add_process(drone, process_info) |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 298 | |
| 299 | self._process_pidfiles(drone, results['pidfiles'], self._pidfiles) |
| 300 | self._process_pidfiles(drone, results['pidfiles_second_read'], |
| 301 | self._pidfiles_second_read) |
| 302 | |
| 303 | |
| 304 | def _execution_tag_for_process(self, drone, process_info): |
| 305 | execution_tag = self._extract_execution_tag(process_info['args']) |
| 306 | if not execution_tag: |
| 307 | # this process has no execution tag - just make up something unique |
| 308 | return '%s.%s' % (drone, process_info['pid']) |
| 309 | return execution_tag |
| 310 | |
| 311 | |
| 312 | def _extract_execution_tag(self, command): |
showard | db50276 | 2009-09-09 15:31:20 +0000 | [diff] [blame] | 313 | match = re.match(r'.* -P (\S+)', command) |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 314 | if not match: |
| 315 | return None |
| 316 | return match.group(1) |
| 317 | |
| 318 | |
| 319 | def execute_actions(self): |
| 320 | """ |
| 321 | Called at the end of a scheduler cycle to execute all queued actions |
| 322 | on drones. |
| 323 | """ |
| 324 | for drone in self._drones.values(): |
| 325 | drone.execute_queued_calls() |
| 326 | |
| 327 | try: |
mbligh | 1ef218d | 2009-08-03 16:57:56 +0000 | [diff] [blame] | 328 | self._results_drone.execute_queued_calls() |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 329 | except error.AutoservError: |
| 330 | warning = ('Results repository failed to execute calls:\n' + |
| 331 | traceback.format_exc()) |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 332 | email_manager.manager.enqueue_notify_email( |
| 333 | 'Results repository error', warning) |
| 334 | self._results_drone.clear_call_queue() |
| 335 | |
| 336 | |
| 337 | def get_orphaned_autoserv_processes(self): |
| 338 | """ |
showard | d3dc199 | 2009-04-22 21:01:40 +0000 | [diff] [blame] | 339 | Returns a set of Process objects for orphaned processes only. |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 340 | """ |
showard | d3dc199 | 2009-04-22 21:01:40 +0000 | [diff] [blame] | 341 | return set(process for process in self._process_set |
| 342 | if process.ppid == 1) |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 343 | |
| 344 | |
| 345 | def get_process_for(self, execution_tag): |
| 346 | """ |
| 347 | Return the process object for the given execution tag. |
| 348 | """ |
| 349 | return self._processes.get(execution_tag, None) |
| 350 | |
| 351 | |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 352 | def kill_process(self, process): |
| 353 | """ |
| 354 | Kill the given process. |
| 355 | """ |
showard | d3dc199 | 2009-04-22 21:01:40 +0000 | [diff] [blame] | 356 | logging.info('killing %s', process) |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 357 | drone = self._get_drone_for_process(process) |
| 358 | drone.queue_call('kill_process', process) |
| 359 | |
| 360 | |
| 361 | def _ensure_directory_exists(self, path): |
| 362 | if not os.path.exists(path): |
| 363 | os.makedirs(path) |
| 364 | |
| 365 | |
| 366 | def _extract_num_processes(self, command): |
| 367 | try: |
| 368 | machine_list_index = command.index('-m') + 1 |
| 369 | except ValueError: |
| 370 | return 1 |
| 371 | assert machine_list_index < len(command) |
| 372 | machine_list = command[machine_list_index].split(',') |
| 373 | return len(machine_list) |
| 374 | |
| 375 | |
showard | 324bf81 | 2009-01-20 23:23:38 +0000 | [diff] [blame] | 376 | def total_running_processes(self): |
| 377 | return sum(drone.active_processes for drone in self.get_drones()) |
| 378 | |
| 379 | |
| 380 | def max_runnable_processes(self): |
| 381 | """ |
| 382 | Return the maximum number of processes that can be run (in a single |
| 383 | execution) given the current load on drones. |
| 384 | """ |
showard | de700d3 | 2009-02-25 00:12:42 +0000 | [diff] [blame] | 385 | if not self._drone_queue: |
| 386 | # all drones disabled |
| 387 | return 0 |
showard | 324bf81 | 2009-01-20 23:23:38 +0000 | [diff] [blame] | 388 | return max(drone.max_processes - drone.active_processes |
showard | de700d3 | 2009-02-25 00:12:42 +0000 | [diff] [blame] | 389 | for _, drone in self._drone_queue) |
showard | 324bf81 | 2009-01-20 23:23:38 +0000 | [diff] [blame] | 390 | |
| 391 | |
showard | e39ebe9 | 2009-06-18 23:14:48 +0000 | [diff] [blame] | 392 | def _least_loaded_drone(self, drones): |
| 393 | drone_to_use = drones[0] |
| 394 | for drone in drones[1:]: |
| 395 | if drone.used_capacity() < drone_to_use.used_capacity(): |
| 396 | drone_to_use = drone |
| 397 | return drone_to_use |
| 398 | |
| 399 | |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 400 | def _choose_drone_for_execution(self, num_processes): |
showard | 324bf81 | 2009-01-20 23:23:38 +0000 | [diff] [blame] | 401 | # cycle through drones is order of increasing used capacity until |
| 402 | # we find one that can handle these processes |
| 403 | checked_drones = [] |
| 404 | drone_to_use = None |
| 405 | while self._drone_queue: |
| 406 | used_capacity, drone = heapq.heappop(self._drone_queue) |
| 407 | checked_drones.append(drone) |
| 408 | if drone.active_processes + num_processes <= drone.max_processes: |
| 409 | drone_to_use = drone |
| 410 | break |
| 411 | |
showard | e39ebe9 | 2009-06-18 23:14:48 +0000 | [diff] [blame] | 412 | if not drone_to_use: |
showard | 324bf81 | 2009-01-20 23:23:38 +0000 | [diff] [blame] | 413 | drone_summary = ','.join('%s %s/%s' % (drone.hostname, |
| 414 | drone.active_processes, |
| 415 | drone.max_processes) |
showard | de700d3 | 2009-02-25 00:12:42 +0000 | [diff] [blame] | 416 | for drone in checked_drones) |
showard | e39ebe9 | 2009-06-18 23:14:48 +0000 | [diff] [blame] | 417 | logging.error('No drone has capacity to handle %d processes (%s)', |
| 418 | num_processes, drone_summary) |
| 419 | drone_to_use = self._least_loaded_drone(checked_drones) |
| 420 | |
| 421 | drone_to_use.active_processes += num_processes |
showard | 324bf81 | 2009-01-20 23:23:38 +0000 | [diff] [blame] | 422 | |
| 423 | # refill _drone_queue |
| 424 | for drone in checked_drones: |
| 425 | self._enqueue_drone(drone) |
| 426 | |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 427 | return drone_to_use |
| 428 | |
| 429 | |
showard | ed2afea | 2009-07-07 20:54:07 +0000 | [diff] [blame] | 430 | def _substitute_working_directory_into_command(self, command, |
| 431 | working_directory): |
| 432 | for i, item in enumerate(command): |
| 433 | if item is WORKING_DIRECTORY: |
| 434 | command[i] = working_directory |
| 435 | |
| 436 | |
showard | d3dc199 | 2009-04-22 21:01:40 +0000 | [diff] [blame] | 437 | def execute_command(self, command, working_directory, pidfile_name, |
| 438 | log_file=None, paired_with_pidfile=None): |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 439 | """ |
| 440 | Execute the given command, taken as an argv list. |
| 441 | |
showard | ed2afea | 2009-07-07 20:54:07 +0000 | [diff] [blame] | 442 | @param command: command to execute as a list. if any item is |
| 443 | WORKING_DIRECTORY, the absolute path to the working directory |
| 444 | will be substituted for it. |
| 445 | @param working_directory: directory in which the pidfile will be written |
| 446 | @param pidfile_name: name of the pidfile this process will write |
| 447 | @param log_file (optional): path (in the results repository) to hold |
| 448 | command output. |
| 449 | @param paired_with_pidfile (optional): a PidfileId for an |
| 450 | already-executed process; the new process will execute on the |
| 451 | same drone as the previous process. |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 452 | """ |
showard | db50276 | 2009-09-09 15:31:20 +0000 | [diff] [blame] | 453 | abs_working_directory = self.absolute_path(working_directory) |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 454 | if not log_file: |
| 455 | log_file = self.get_temporary_path('execute') |
| 456 | log_file = self.absolute_path(log_file) |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 457 | |
showard | ed2afea | 2009-07-07 20:54:07 +0000 | [diff] [blame] | 458 | self._substitute_working_directory_into_command(command, |
showard | db50276 | 2009-09-09 15:31:20 +0000 | [diff] [blame] | 459 | abs_working_directory) |
showard | ed2afea | 2009-07-07 20:54:07 +0000 | [diff] [blame] | 460 | |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 461 | if paired_with_pidfile: |
| 462 | drone = self._get_drone_for_pidfile_id(paired_with_pidfile) |
| 463 | else: |
| 464 | num_processes = self._extract_num_processes(command) |
| 465 | drone = self._choose_drone_for_execution(num_processes) |
showard | b18134f | 2009-03-20 20:52:18 +0000 | [diff] [blame] | 466 | logging.info("command = %s" % command) |
| 467 | logging.info('log file = %s:%s' % (drone.hostname, log_file)) |
showard | db50276 | 2009-09-09 15:31:20 +0000 | [diff] [blame] | 468 | self._write_attached_files(working_directory, drone) |
| 469 | drone.queue_call('execute_command', command, abs_working_directory, |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 470 | log_file, pidfile_name) |
| 471 | |
showard | 42d4498 | 2009-10-12 20:34:03 +0000 | [diff] [blame] | 472 | pidfile_path = os.path.join(abs_working_directory, pidfile_name) |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 473 | pidfile_id = PidfileId(pidfile_path) |
| 474 | self.register_pidfile(pidfile_id) |
| 475 | return pidfile_id |
| 476 | |
| 477 | |
showard | d3dc199 | 2009-04-22 21:01:40 +0000 | [diff] [blame] | 478 | def get_pidfile_id_from(self, execution_tag, pidfile_name): |
| 479 | path = os.path.join(self.absolute_path(execution_tag), pidfile_name) |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 480 | return PidfileId(path) |
| 481 | |
| 482 | |
| 483 | def register_pidfile(self, pidfile_id): |
| 484 | """ |
| 485 | Indicate that the DroneManager should look for the given pidfile when |
| 486 | refreshing. |
| 487 | """ |
showard | 3739978 | 2009-08-20 23:32:20 +0000 | [diff] [blame] | 488 | if pidfile_id not in self._pidfile_age: |
| 489 | logging.info('monitoring pidfile %s', pidfile_id) |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 490 | self._pidfile_age[pidfile_id] = 0 |
| 491 | |
| 492 | |
showard | f85a0b7 | 2009-10-07 20:48:45 +0000 | [diff] [blame] | 493 | def unregister_pidfile(self, pidfile_id): |
| 494 | if pidfile_id in self._pidfile_age: |
| 495 | logging.info('forgetting pidfile %s', pidfile_id) |
| 496 | del self._pidfile_age[pidfile_id] |
| 497 | |
| 498 | |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 499 | def get_pidfile_contents(self, pidfile_id, use_second_read=False): |
| 500 | """ |
| 501 | Retrieve a PidfileContents object for the given pidfile_id. If |
| 502 | use_second_read is True, use results that were read after the processes |
| 503 | were checked, instead of before. |
| 504 | """ |
| 505 | self.register_pidfile(pidfile_id) |
| 506 | if use_second_read: |
| 507 | pidfile_map = self._pidfiles_second_read |
| 508 | else: |
| 509 | pidfile_map = self._pidfiles |
| 510 | return pidfile_map.get(pidfile_id, PidfileContents()) |
| 511 | |
| 512 | |
| 513 | def is_process_running(self, process): |
| 514 | """ |
| 515 | Check if the given process is in the running process list. |
| 516 | """ |
| 517 | return process in self._process_set |
| 518 | |
| 519 | |
| 520 | def get_temporary_path(self, base_name): |
| 521 | """ |
| 522 | Get a new temporary path guaranteed to be unique across all drones |
| 523 | for this scheduler execution. |
| 524 | """ |
| 525 | self._temporary_path_counter += 1 |
| 526 | return os.path.join(drone_utility._TEMPORARY_DIRECTORY, |
| 527 | '%s.%s' % (base_name, self._temporary_path_counter)) |
| 528 | |
| 529 | |
showard | 42d4498 | 2009-10-12 20:34:03 +0000 | [diff] [blame] | 530 | def absolute_path(self, path, on_results_repository=False): |
| 531 | if on_results_repository: |
| 532 | base_dir = self._results_dir |
| 533 | else: |
showard | c75fded | 2009-10-14 16:20:02 +0000 | [diff] [blame] | 534 | base_dir = os.path.join(drones.AUTOTEST_INSTALL_DIR, |
| 535 | _DRONE_RESULTS_DIR_SUFFIX) |
showard | 42d4498 | 2009-10-12 20:34:03 +0000 | [diff] [blame] | 536 | return os.path.join(base_dir, path) |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 537 | |
| 538 | |
showard | 678df4f | 2009-02-04 21:36:39 +0000 | [diff] [blame] | 539 | def _copy_results_helper(self, process, source_path, destination_path, |
| 540 | to_results_repository=False): |
| 541 | full_source = self.absolute_path(source_path) |
showard | 42d4498 | 2009-10-12 20:34:03 +0000 | [diff] [blame] | 542 | full_destination = self.absolute_path( |
| 543 | destination_path, on_results_repository=to_results_repository) |
showard | 678df4f | 2009-02-04 21:36:39 +0000 | [diff] [blame] | 544 | source_drone = self._get_drone_for_process(process) |
| 545 | if to_results_repository: |
| 546 | source_drone.send_file_to(self._results_drone, full_source, |
| 547 | full_destination, can_fail=True) |
| 548 | else: |
| 549 | source_drone.queue_call('copy_file_or_directory', full_source, |
| 550 | full_destination) |
| 551 | |
| 552 | |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 553 | def copy_to_results_repository(self, process, source_path, |
| 554 | destination_path=None): |
| 555 | """ |
| 556 | Copy results from the given process at source_path to destination_path |
| 557 | in the results repository. |
| 558 | """ |
| 559 | if destination_path is None: |
| 560 | destination_path = source_path |
showard | 678df4f | 2009-02-04 21:36:39 +0000 | [diff] [blame] | 561 | self._copy_results_helper(process, source_path, destination_path, |
| 562 | to_results_repository=True) |
| 563 | |
| 564 | |
| 565 | def copy_results_on_drone(self, process, source_path, destination_path): |
| 566 | """ |
| 567 | Copy a results directory from one place to another on the drone. |
| 568 | """ |
| 569 | self._copy_results_helper(process, source_path, destination_path) |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 570 | |
| 571 | |
showard | db50276 | 2009-09-09 15:31:20 +0000 | [diff] [blame] | 572 | def _write_attached_files(self, results_dir, drone): |
| 573 | attached_files = self._attached_files.pop(results_dir, {}) |
showard | 73ec044 | 2009-02-07 02:05:20 +0000 | [diff] [blame] | 574 | for file_path, contents in attached_files.iteritems(): |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 575 | drone.queue_call('write_to_file', self.absolute_path(file_path), |
| 576 | contents) |
| 577 | |
| 578 | |
showard | db50276 | 2009-09-09 15:31:20 +0000 | [diff] [blame] | 579 | def attach_file_to_execution(self, results_dir, file_contents, |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 580 | file_path=None): |
| 581 | """ |
showard | db50276 | 2009-09-09 15:31:20 +0000 | [diff] [blame] | 582 | When the process for the results directory is executed, the given file |
| 583 | contents will be placed in a file on the drone. Returns the path at |
| 584 | which the file will be placed. |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 585 | """ |
| 586 | if not file_path: |
| 587 | file_path = self.get_temporary_path('attach') |
showard | db50276 | 2009-09-09 15:31:20 +0000 | [diff] [blame] | 588 | files_for_execution = self._attached_files.setdefault(results_dir, {}) |
showard | 73ec044 | 2009-02-07 02:05:20 +0000 | [diff] [blame] | 589 | assert file_path not in files_for_execution |
| 590 | files_for_execution[file_path] = file_contents |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 591 | return file_path |
| 592 | |
| 593 | |
showard | 35162b0 | 2009-03-03 02:17:30 +0000 | [diff] [blame] | 594 | def write_lines_to_file(self, file_path, lines, paired_with_process=None): |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 595 | """ |
| 596 | Write the given lines (as a list of strings) to a file. If |
showard | 35162b0 | 2009-03-03 02:17:30 +0000 | [diff] [blame] | 597 | paired_with_process is given, the file will be written on the drone |
| 598 | running the given Process. Otherwise, the file will be written to the |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 599 | results repository. |
| 600 | """ |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 601 | file_contents = '\n'.join(lines) + '\n' |
showard | 35162b0 | 2009-03-03 02:17:30 +0000 | [diff] [blame] | 602 | if paired_with_process: |
| 603 | drone = self._get_drone_for_process(paired_with_process) |
showard | 42d4498 | 2009-10-12 20:34:03 +0000 | [diff] [blame] | 604 | on_results_repository = False |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 605 | else: |
| 606 | drone = self._results_drone |
showard | 42d4498 | 2009-10-12 20:34:03 +0000 | [diff] [blame] | 607 | on_results_repository = True |
| 608 | full_path = self.absolute_path( |
| 609 | file_path, on_results_repository=on_results_repository) |
showard | 170873e | 2009-01-07 00:22:26 +0000 | [diff] [blame] | 610 | drone.queue_call('write_to_file', full_path, file_contents) |