blob: 0554b9afb46845f72251804ac5beb0eeb7d87016 [file] [log] [blame]
Allen Liff7064f2017-09-13 15:11:31 -07001# Copyright 2017 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Library providing an API to lucifer."""
6
7import os
Allen Li5a3ed352017-11-13 15:49:20 -08008import logging
Allen Liff7064f2017-09-13 15:11:31 -07009import pipes
Allen Lib07ab0f2018-01-26 17:39:47 -080010import socket
Allen Li19fee8a2018-04-02 12:51:58 -070011import subprocess
Allen Liff7064f2017-09-13 15:11:31 -070012
13import common
14from autotest_lib.client.bin import local_host
15from autotest_lib.client.common_lib import global_config
16from autotest_lib.server.hosts import ssh_host
Allen Li68237232018-02-16 11:13:24 -080017from autotest_lib.frontend.afe import models
Allen Liff7064f2017-09-13 15:11:31 -070018
19_config = global_config.global_config
20_SECTION = 'LUCIFER'
21
22# TODO(crbug.com/748234): Move these to shadow_config.ini
23# See also drones.AUTOTEST_INSTALL_DIR
Allen Li9d994402018-02-28 14:25:22 -080024_ENV = '/usr/bin/env'
Allen Liff7064f2017-09-13 15:11:31 -070025_AUTOTEST_DIR = '/usr/local/autotest'
Allen Li5a3ed352017-11-13 15:49:20 -080026_JOB_REPORTER_PATH = os.path.join(_AUTOTEST_DIR, 'bin', 'job_reporter')
27
28logger = logging.getLogger(__name__)
Allen Liff7064f2017-09-13 15:11:31 -070029
30
31def is_lucifer_enabled():
32 """Return True if lucifer is enabled in the config."""
Allen Li30d199f2018-01-31 15:54:13 -080033 return True
Allen Liff7064f2017-09-13 15:11:31 -070034
35
Allen Li67c1e1e2017-12-15 16:35:37 -080036def is_enabled_for(level):
37 """Return True if lucifer is enabled for the given level.
38
39 @param level: string, e.g. 'PARSING', 'GATHERING'
40 """
41 if not is_lucifer_enabled():
42 return False
43 config_level = (_config.get_config_value(_SECTION, 'lucifer_level')
44 .upper())
45 return level.upper() == config_level
46
47
Allen Liff7064f2017-09-13 15:11:31 -070048def is_lucifer_owned(job):
Allen Li80f51562018-04-06 16:31:48 -070049 """Return True if job is already sent to lucifer.
50
51 @param job: frontend.afe.models.Job instance
52 """
53 assert isinstance(job, models.Job)
Allen Lifef16ae2017-11-20 15:23:02 -080054 return hasattr(job, 'jobhandoff')
Allen Liff7064f2017-09-13 15:11:31 -070055
56
Allen Li80f51562018-04-06 16:31:48 -070057def is_lucifer_owned_by_id(job_id):
58 """Return True if job is already sent to lucifer."""
59 return models.JobHandoff.objects.filter(job_id=job_id).exists()
60
61
Allen Li68237232018-02-16 11:13:24 -080062def is_split_job(hqe_id):
63 """Return True if HQE is part of a job with HQEs in a different group.
64
65 For examples if the given HQE have execution_subdir=foo and the job
66 has an HQE with execution_subdir=bar, then return True. The only
67 situation where this happens is if provisioning in a multi-DUT job
68 fails, the HQEs will each be in their own group.
69
70 See https://bugs.chromium.org/p/chromium/issues/detail?id=811877
71
72 @param hqe_id: HQE id
73 """
74 hqe = models.HostQueueEntry.objects.get(id=hqe_id)
75 hqes = hqe.job.hostqueueentry_set.all()
76 try:
77 _get_consistent_execution_path(hqes)
78 except _ExecutionPathError:
79 return True
80 return False
81
82
Allen Lib8b2e592017-12-14 17:41:40 -080083# TODO(crbug.com/748234): This is temporary to enable toggling
84# lucifer rollouts with an option.
Allen Li3710b6d2018-02-09 18:02:24 -080085def spawn_starting_job_handler(manager, job):
86 """Spawn job_reporter to handle a job.
87
88 Pass all arguments by keyword.
89
90 @param manager: scheduler.drone_manager.DroneManager instance
91 @param job: Job instance
92 @returns: Drone instance
93 """
Allen Li4e058e32018-02-07 14:04:20 -080094 manager = _DroneManager(manager)
95 drone = manager.pick_drone_to_use()
96 results_dir = _results_dir(manager, job)
97 args = [
98 _JOB_REPORTER_PATH,
99
100 # General configuration
101 '--jobdir', _get_jobdir(),
102 '--run-job-path', _get_run_job_path(),
103 '--watcher-path', _get_watcher_path(),
104
105 # Job specific
106 '--lucifer-level', 'STARTING',
107 '--job-id', str(job.id),
108 '--results-dir', results_dir,
109
110 # STARTING specific
111 '--execution-tag', _working_directory(job),
112 ]
113 if _get_gcp_creds():
114 args = [
115 'GOOGLE_APPLICATION_CREDENTIALS=%s'
116 % pipes.quote(_get_gcp_creds()),
117 ] + args
Allen Li19fee8a2018-04-02 12:51:58 -0700118 drone.spawn(_ENV, args,
119 output_file=_prepare_output_file(drone, results_dir))
Allen Li4e058e32018-02-07 14:04:20 -0800120 return drone
121
122
123_LUCIFER_DIR = 'lucifer'
124
125
Allen Li19fee8a2018-04-02 12:51:58 -0700126def _prepare_output_file(drone, results_dir):
Allen Li4e058e32018-02-07 14:04:20 -0800127 logdir = os.path.join(results_dir, _LUCIFER_DIR)
Allen Li19fee8a2018-04-02 12:51:58 -0700128 drone.run('mkdir', ['-p', logdir])
Allen Li4e058e32018-02-07 14:04:20 -0800129 return os.path.join(logdir, 'job_reporter_output.log')
Allen Li3710b6d2018-02-09 18:02:24 -0800130
131
132# TODO(crbug.com/748234): This is temporary to enable toggling
133# lucifer rollouts with an option.
Allen Lib8b2e592017-12-14 17:41:40 -0800134def spawn_gathering_job_handler(manager, job, autoserv_exit, pidfile_id=None):
135 """Spawn job_reporter to handle a job.
136
137 Pass all arguments by keyword.
138
Allen Lib07ab0f2018-01-26 17:39:47 -0800139 @param manager: scheduler.drone_manager.DroneManager instance
Allen Lib8b2e592017-12-14 17:41:40 -0800140 @param job: Job instance
141 @param autoserv_exit: autoserv exit status
142 @param pidfile_id: PidfileId instance
Allen Lib07ab0f2018-01-26 17:39:47 -0800143 @returns: Drone instance
Allen Lib8b2e592017-12-14 17:41:40 -0800144 """
145 manager = _DroneManager(manager)
146 if pidfile_id is None:
147 drone = manager.pick_drone_to_use()
148 else:
149 drone = manager.get_drone_for_pidfile(pidfile_id)
Allen Li45c2fdf2018-02-14 18:47:40 -0800150 results_dir = _results_dir(manager, job)
Allen Lid84961d2018-02-22 13:07:57 -0800151 num_tests_failed = manager.get_num_tests_failed(pidfile_id)
Allen Lib8b2e592017-12-14 17:41:40 -0800152 args = [
Allen Li9d994402018-02-28 14:25:22 -0800153 _JOB_REPORTER_PATH,
154
Allen Li45c2fdf2018-02-14 18:47:40 -0800155 # General configuration
Allen Lib8b2e592017-12-14 17:41:40 -0800156 '--jobdir', _get_jobdir(),
Allen Li45c2fdf2018-02-14 18:47:40 -0800157 '--run-job-path', _get_run_job_path(),
158 '--watcher-path', _get_watcher_path(),
159
160 # Job specific
Allen Lib8b2e592017-12-14 17:41:40 -0800161 '--job-id', str(job.id),
Allen Lic7cd1de2018-02-20 17:46:22 -0800162 '--lucifer-level', 'GATHERING',
Allen Lib8b2e592017-12-14 17:41:40 -0800163 '--autoserv-exit', str(autoserv_exit),
Allen Lid84961d2018-02-22 13:07:57 -0800164 '--need-gather',
165 '--num-tests-failed', str(num_tests_failed),
Allen Li45c2fdf2018-02-14 18:47:40 -0800166 '--results-dir', results_dir,
Allen Lib8b2e592017-12-14 17:41:40 -0800167 ]
Allen Li9d994402018-02-28 14:25:22 -0800168 if _get_gcp_creds():
169 args = [
170 'GOOGLE_APPLICATION_CREDENTIALS=%s'
171 % pipes.quote(_get_gcp_creds()),
172 ] + args
Allen Lib8b2e592017-12-14 17:41:40 -0800173 output_file = os.path.join(results_dir, 'job_reporter_output.log')
Allen Li9d994402018-02-28 14:25:22 -0800174 drone.spawn(_ENV, args, output_file=output_file)
Allen Lib07ab0f2018-01-26 17:39:47 -0800175 return drone
Allen Lib8b2e592017-12-14 17:41:40 -0800176
177
178# TODO(crbug.com/748234): This is temporary to enable toggling
179# lucifer rollouts with an option.
180def spawn_parsing_job_handler(manager, job, autoserv_exit, pidfile_id=None):
Allen Liff7064f2017-09-13 15:11:31 -0700181 """Spawn job_reporter to handle a job.
182
183 Pass all arguments by keyword.
184
Allen Lib07ab0f2018-01-26 17:39:47 -0800185 @param manager: scheduler.drone_manager.DroneManager instance
Allen Liff7064f2017-09-13 15:11:31 -0700186 @param job: Job instance
187 @param autoserv_exit: autoserv exit status
188 @param pidfile_id: PidfileId instance
Allen Lib07ab0f2018-01-26 17:39:47 -0800189 @returns: Drone instance
Allen Liff7064f2017-09-13 15:11:31 -0700190 """
191 manager = _DroneManager(manager)
192 if pidfile_id is None:
193 drone = manager.pick_drone_to_use()
194 else:
195 drone = manager.get_drone_for_pidfile(pidfile_id)
Allen Li45c2fdf2018-02-14 18:47:40 -0800196 results_dir = _results_dir(manager, job)
Allen Liff7064f2017-09-13 15:11:31 -0700197 args = [
Allen Li9d994402018-02-28 14:25:22 -0800198 _JOB_REPORTER_PATH,
199
Allen Li45c2fdf2018-02-14 18:47:40 -0800200 # General configuration
Allen Li057be2c2017-11-08 13:51:24 -0800201 '--jobdir', _get_jobdir(),
Allen Li45c2fdf2018-02-14 18:47:40 -0800202 '--run-job-path', _get_run_job_path(),
203 '--watcher-path', _get_watcher_path(),
204
205 # Job specific
Allen Lif4b62ae2017-11-09 15:48:05 -0800206 '--job-id', str(job.id),
Allen Lic7cd1de2018-02-20 17:46:22 -0800207 '--lucifer-level', 'GATHERING',
Allen Lif4b62ae2017-11-09 15:48:05 -0800208 '--autoserv-exit', str(autoserv_exit),
Allen Li45c2fdf2018-02-14 18:47:40 -0800209 '--results-dir', results_dir,
Allen Liff7064f2017-09-13 15:11:31 -0700210 ]
Allen Li9d994402018-02-28 14:25:22 -0800211 if _get_gcp_creds():
212 args = [
213 'GOOGLE_APPLICATION_CREDENTIALS=%s'
214 % pipes.quote(_get_gcp_creds()),
215 ] + args
Allen Liff7064f2017-09-13 15:11:31 -0700216 output_file = os.path.join(results_dir, 'job_reporter_output.log')
Allen Li9d994402018-02-28 14:25:22 -0800217 drone.spawn(_ENV, args, output_file=output_file)
Allen Lib07ab0f2018-01-26 17:39:47 -0800218 return drone
Allen Liff7064f2017-09-13 15:11:31 -0700219
220
Allen Li057be2c2017-11-08 13:51:24 -0800221def _get_jobdir():
Allen Li9d994402018-02-28 14:25:22 -0800222 return _config.get_config_value(_SECTION, 'jobdir')
Allen Li057be2c2017-11-08 13:51:24 -0800223
224
225def _get_run_job_path():
226 return os.path.join(_get_binaries_path(), 'lucifer_run_job')
227
228
229def _get_watcher_path():
230 return os.path.join(_get_binaries_path(), 'lucifer_watcher')
231
232
233def _get_binaries_path():
234 """Get binaries dir path from config.."""
Allen Li9d994402018-02-28 14:25:22 -0800235 return _config.get_config_value(_SECTION, 'binaries_path')
236
237
238def _get_gcp_creds():
Allen Li16d0ef82018-04-05 16:33:21 -0700239 """Return path to GCP service account credentials.
Allen Li9d994402018-02-28 14:25:22 -0800240
Allen Li16d0ef82018-04-05 16:33:21 -0700241 This is the empty string by default, if no credentials will be used.
242 """
243 return _config.get_config_value(_SECTION, 'gcp_creds', default='')
Allen Li057be2c2017-11-08 13:51:24 -0800244
245
Allen Liff7064f2017-09-13 15:11:31 -0700246class _DroneManager(object):
247 """Simplified drone API."""
248
249 def __init__(self, old_manager):
250 """Initialize instance.
251
252 @param old_manager: old style DroneManager
253 """
254 self._manager = old_manager
255
Allen Lib8b2e592017-12-14 17:41:40 -0800256 def get_num_tests_failed(self, pidfile_id):
257 """Return the number of tests failed for autoserv by pidfile.
258
259 @param pidfile_id: PidfileId instance.
260 @returns: int (-1 if missing)
261 """
262 state = self._manager.get_pidfile_contents(pidfile_id)
263 if state.num_tests_failed is None:
264 return -1
265 return state.num_tests_failed
266
Allen Liff7064f2017-09-13 15:11:31 -0700267 def get_drone_for_pidfile(self, pidfile_id):
268 """Return a drone to use from a pidfile.
269
270 @param pidfile_id: PidfileId instance.
271 """
272 return _wrap_drone(self._manager.get_drone_for_pidfile_id(pidfile_id))
273
274 def pick_drone_to_use(self, num_processes=1, prefer_ssp=False):
275 """Return a drone to use.
276
277 Various options can be passed to optimize drone selection.
278
279 @param num_processes: number of processes the drone is intended
280 to run
281 @param prefer_ssp: indicates whether drones supporting
282 server-side packaging should be preferred. The returned
283 drone is not guaranteed to support it.
284 """
285 old_drone = self._manager.pick_drone_to_use(
286 num_processes=num_processes,
287 prefer_ssp=prefer_ssp,
288 )
289 return _wrap_drone(old_drone)
290
291 def absolute_path(self, path):
292 """Return absolute path for drone results.
293
294 The returned path might be remote.
295 """
296 return self._manager.absolute_path(path)
297
298
299def _wrap_drone(old_drone):
300 """Wrap an old style drone."""
301 host = old_drone._host
302 if isinstance(host, local_host.LocalHost):
303 return LocalDrone()
304 elif isinstance(host, ssh_host.SSHHost):
305 return RemoteDrone(host)
306 else:
307 raise TypeError('Drone has an unknown host type')
308
309
310def _results_dir(manager, job):
311 """Return results dir for a job.
312
313 Path may be on a remote host.
314 """
315 return manager.absolute_path(_working_directory(job))
316
317
318def _working_directory(job):
Allen Lif4b62ae2017-11-09 15:48:05 -0800319 return _get_consistent_execution_path(job.hostqueueentry_set.all())
Allen Liff7064f2017-09-13 15:11:31 -0700320
321
322def _get_consistent_execution_path(execution_entries):
323 first_execution_path = execution_entries[0].execution_path()
324 for execution_entry in execution_entries[1:]:
Allen Li68237232018-02-16 11:13:24 -0800325 if execution_entry.execution_path() != first_execution_path:
326 raise _ExecutionPathError(
327 '%s (%s) != %s (%s)'
328 % (execution_entry.execution_path(),
329 execution_entry,
330 first_execution_path,
331 execution_entries[0]))
Allen Liff7064f2017-09-13 15:11:31 -0700332 return first_execution_path
333
334
Allen Li68237232018-02-16 11:13:24 -0800335class _ExecutionPathError(Exception):
336 """Raised by _get_consistent_execution_path()."""
337
338
Allen Liff7064f2017-09-13 15:11:31 -0700339class Drone(object):
340 """Simplified drone API."""
341
Allen Lib07ab0f2018-01-26 17:39:47 -0800342 def hostname(self):
343 """Return the hostname of the drone."""
344
Allen Li19fee8a2018-04-02 12:51:58 -0700345 def run(self, path, args):
346 """Run a command synchronously.
347
348 path must be an absolute path. path may be on a remote machine.
349 args is a list of arguments.
350
351 The process may or may not have its own session. The process
352 should be short-lived. It should not try to obtain a
353 controlling terminal.
354
355 The new process will have stdin, stdout, and stderr opened to
356 /dev/null.
357
358 This method intentionally has a very restrictive API. It should
359 be used to perform setup local to the drone, when the drone may
360 be a remote machine.
361 """
362
Allen Liff7064f2017-09-13 15:11:31 -0700363 def spawn(self, path, args, output_file):
364 """Spawn an independent process.
365
366 path must be an absolute path. path may be on a remote machine.
367 args is a list of arguments.
368
369 The process is spawned in its own session. It should not try to
370 obtain a controlling terminal.
371
372 The new process will have stdin opened to /dev/null and stdout,
373 stderr opened to output_file.
374
375 output_file is a pathname, but how it is interpreted is
376 implementation defined, e.g., it may be a remote file.
377 """
378
379
380class LocalDrone(Drone):
381 """Local implementation of Drone."""
382
Allen Lib07ab0f2018-01-26 17:39:47 -0800383 def hostname(self):
384 return socket.gethostname()
385
Allen Li19fee8a2018-04-02 12:51:58 -0700386 def run(self, path, args):
387 with open(os.devnull, 'r+b') as null:
388 subprocess.call([path] + args, stdin=null,
389 stdout=null, stderr=null)
390
Allen Liff7064f2017-09-13 15:11:31 -0700391 def spawn(self, path, args, output_file):
392 _spawn(path, [path] + args, output_file)
393
394
395class RemoteDrone(Drone):
396 """Remote implementation of Drone through SSH."""
397
398 def __init__(self, host):
399 if not isinstance(host, ssh_host.SSHHost):
400 raise TypeError('RemoteDrone must be passed an SSHHost')
401 self._host = host
402
Allen Lib07ab0f2018-01-26 17:39:47 -0800403 def hostname(self):
404 return self._host.hostname
405
Allen Li19fee8a2018-04-02 12:51:58 -0700406 def run(self, path, args):
407 cmd_parts = [path] + args
408 safe_cmd = ' '.join(pipes.quote(part) for part in cmd_parts)
409 self._host.run('%(cmd)s <%(null)s >%(null)s 2>&1'
410 % {'cmd': safe_cmd, 'null': os.devnull})
411
Allen Liff7064f2017-09-13 15:11:31 -0700412 def spawn(self, path, args, output_file):
413 cmd_parts = [path] + args
414 safe_cmd = ' '.join(pipes.quote(part) for part in cmd_parts)
415 safe_file = pipes.quote(output_file)
416 # SSH creates a session for each command, so we do not have to
417 # do it.
Allen Li5cca8182017-11-20 13:12:51 -0800418 self._host.run('%(cmd)s <%(null)s >>%(file)s 2>&1 &'
Allen Liff7064f2017-09-13 15:11:31 -0700419 % {'cmd': safe_cmd,
420 'file': safe_file,
421 'null': os.devnull})
422
423
424def _spawn(path, argv, output_file):
425 """Spawn a new process in its own session.
426
427 path must be an absolute path. The first item in argv should be
428 path.
429
430 In the calling process, this function returns on success.
431 The forked process puts itself in its own session and execs.
432
433 The new process will have stdin opened to /dev/null and stdout,
434 stderr opened to output_file.
435 """
Allen Li5a3ed352017-11-13 15:49:20 -0800436 logger.info('Spawning %r, %r, %r', path, argv, output_file)
Allen Lif4b62ae2017-11-09 15:48:05 -0800437 assert all(isinstance(arg, basestring) for arg in argv)
Congbin Guo4204c192018-02-21 15:40:14 -0800438 pid = os.fork()
439 if pid:
440 os.waitpid(pid, 0)
Allen Liff7064f2017-09-13 15:11:31 -0700441 return
Allen Li1f0b4c22018-01-22 12:28:27 -0800442 # Double fork to reparent to init since monitor_db does not reap.
443 if os.fork():
444 os._exit(os.EX_OK)
Allen Liff7064f2017-09-13 15:11:31 -0700445 os.setsid()
446 null_fd = os.open(os.devnull, os.O_RDONLY)
447 os.dup2(null_fd, 0)
448 os.close(null_fd)
Allen Li5cca8182017-11-20 13:12:51 -0800449 out_fd = os.open(output_file, os.O_WRONLY | os.O_APPEND | os.O_CREAT)
Allen Liff7064f2017-09-13 15:11:31 -0700450 os.dup2(out_fd, 1)
451 os.dup2(out_fd, 2)
452 os.close(out_fd)
453 os.execv(path, argv)