blob: 6cc107d2ba823eb2befb0c7e70edf9ea0831fdda [file] [log] [blame]
Allen Liff7064f2017-09-13 15:11:31 -07001# Copyright 2017 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Library providing an API to lucifer."""
6
7import os
Allen Li5a3ed352017-11-13 15:49:20 -08008import logging
Allen Liff7064f2017-09-13 15:11:31 -07009import pipes
Allen Lib07ab0f2018-01-26 17:39:47 -080010import socket
Allen Liff7064f2017-09-13 15:11:31 -070011
12import common
13from autotest_lib.client.bin import local_host
14from autotest_lib.client.common_lib import global_config
15from autotest_lib.server.hosts import ssh_host
Allen Li68237232018-02-16 11:13:24 -080016from autotest_lib.frontend.afe import models
Allen Liff7064f2017-09-13 15:11:31 -070017
18_config = global_config.global_config
19_SECTION = 'LUCIFER'
20
21# TODO(crbug.com/748234): Move these to shadow_config.ini
22# See also drones.AUTOTEST_INSTALL_DIR
Allen Li9d994402018-02-28 14:25:22 -080023_ENV = '/usr/bin/env'
Allen Liff7064f2017-09-13 15:11:31 -070024_AUTOTEST_DIR = '/usr/local/autotest'
Allen Li5a3ed352017-11-13 15:49:20 -080025_JOB_REPORTER_PATH = os.path.join(_AUTOTEST_DIR, 'bin', 'job_reporter')
26
27logger = logging.getLogger(__name__)
Allen Liff7064f2017-09-13 15:11:31 -070028
29
30def is_lucifer_enabled():
31 """Return True if lucifer is enabled in the config."""
Allen Li30d199f2018-01-31 15:54:13 -080032 return True
Allen Liff7064f2017-09-13 15:11:31 -070033
34
Allen Li67c1e1e2017-12-15 16:35:37 -080035def is_enabled_for(level):
36 """Return True if lucifer is enabled for the given level.
37
38 @param level: string, e.g. 'PARSING', 'GATHERING'
39 """
40 if not is_lucifer_enabled():
41 return False
42 config_level = (_config.get_config_value(_SECTION, 'lucifer_level')
43 .upper())
44 return level.upper() == config_level
45
46
Allen Liff7064f2017-09-13 15:11:31 -070047def is_lucifer_owned(job):
48 """Return True if job is already sent to lucifer."""
Allen Lifef16ae2017-11-20 15:23:02 -080049 return hasattr(job, 'jobhandoff')
Allen Liff7064f2017-09-13 15:11:31 -070050
51
Allen Li68237232018-02-16 11:13:24 -080052def is_split_job(hqe_id):
53 """Return True if HQE is part of a job with HQEs in a different group.
54
55 For examples if the given HQE have execution_subdir=foo and the job
56 has an HQE with execution_subdir=bar, then return True. The only
57 situation where this happens is if provisioning in a multi-DUT job
58 fails, the HQEs will each be in their own group.
59
60 See https://bugs.chromium.org/p/chromium/issues/detail?id=811877
61
62 @param hqe_id: HQE id
63 """
64 hqe = models.HostQueueEntry.objects.get(id=hqe_id)
65 hqes = hqe.job.hostqueueentry_set.all()
66 try:
67 _get_consistent_execution_path(hqes)
68 except _ExecutionPathError:
69 return True
70 return False
71
72
Allen Lib8b2e592017-12-14 17:41:40 -080073# TODO(crbug.com/748234): This is temporary to enable toggling
74# lucifer rollouts with an option.
Allen Li3710b6d2018-02-09 18:02:24 -080075def spawn_starting_job_handler(manager, job):
76 """Spawn job_reporter to handle a job.
77
78 Pass all arguments by keyword.
79
80 @param manager: scheduler.drone_manager.DroneManager instance
81 @param job: Job instance
82 @returns: Drone instance
83 """
84 raise NotImplementedError
85
86
87# TODO(crbug.com/748234): This is temporary to enable toggling
88# lucifer rollouts with an option.
Allen Lib8b2e592017-12-14 17:41:40 -080089def spawn_gathering_job_handler(manager, job, autoserv_exit, pidfile_id=None):
90 """Spawn job_reporter to handle a job.
91
92 Pass all arguments by keyword.
93
Allen Lib07ab0f2018-01-26 17:39:47 -080094 @param manager: scheduler.drone_manager.DroneManager instance
Allen Lib8b2e592017-12-14 17:41:40 -080095 @param job: Job instance
96 @param autoserv_exit: autoserv exit status
97 @param pidfile_id: PidfileId instance
Allen Lib07ab0f2018-01-26 17:39:47 -080098 @returns: Drone instance
Allen Lib8b2e592017-12-14 17:41:40 -080099 """
100 manager = _DroneManager(manager)
101 if pidfile_id is None:
102 drone = manager.pick_drone_to_use()
103 else:
104 drone = manager.get_drone_for_pidfile(pidfile_id)
Allen Li45c2fdf2018-02-14 18:47:40 -0800105 results_dir = _results_dir(manager, job)
Allen Lid84961d2018-02-22 13:07:57 -0800106 num_tests_failed = manager.get_num_tests_failed(pidfile_id)
Allen Lib8b2e592017-12-14 17:41:40 -0800107 args = [
Allen Li9d994402018-02-28 14:25:22 -0800108 _JOB_REPORTER_PATH,
109
Allen Li45c2fdf2018-02-14 18:47:40 -0800110 # General configuration
Allen Lib8b2e592017-12-14 17:41:40 -0800111 '--jobdir', _get_jobdir(),
Allen Li45c2fdf2018-02-14 18:47:40 -0800112 '--run-job-path', _get_run_job_path(),
113 '--watcher-path', _get_watcher_path(),
114
115 # Job specific
Allen Lib8b2e592017-12-14 17:41:40 -0800116 '--job-id', str(job.id),
Allen Lic7cd1de2018-02-20 17:46:22 -0800117 '--lucifer-level', 'GATHERING',
Allen Lib8b2e592017-12-14 17:41:40 -0800118 '--autoserv-exit', str(autoserv_exit),
Allen Lid84961d2018-02-22 13:07:57 -0800119 '--need-gather',
120 '--num-tests-failed', str(num_tests_failed),
Allen Li45c2fdf2018-02-14 18:47:40 -0800121 '--results-dir', results_dir,
Allen Lib8b2e592017-12-14 17:41:40 -0800122 ]
Allen Li9d994402018-02-28 14:25:22 -0800123 if _get_gcp_creds():
124 args = [
125 'GOOGLE_APPLICATION_CREDENTIALS=%s'
126 % pipes.quote(_get_gcp_creds()),
127 ] + args
Allen Lib8b2e592017-12-14 17:41:40 -0800128 output_file = os.path.join(results_dir, 'job_reporter_output.log')
Allen Li9d994402018-02-28 14:25:22 -0800129 drone.spawn(_ENV, args, output_file=output_file)
Allen Lib07ab0f2018-01-26 17:39:47 -0800130 return drone
Allen Lib8b2e592017-12-14 17:41:40 -0800131
132
133# TODO(crbug.com/748234): This is temporary to enable toggling
134# lucifer rollouts with an option.
135def spawn_parsing_job_handler(manager, job, autoserv_exit, pidfile_id=None):
Allen Liff7064f2017-09-13 15:11:31 -0700136 """Spawn job_reporter to handle a job.
137
138 Pass all arguments by keyword.
139
Allen Lib07ab0f2018-01-26 17:39:47 -0800140 @param manager: scheduler.drone_manager.DroneManager instance
Allen Liff7064f2017-09-13 15:11:31 -0700141 @param job: Job instance
142 @param autoserv_exit: autoserv exit status
143 @param pidfile_id: PidfileId instance
Allen Lib07ab0f2018-01-26 17:39:47 -0800144 @returns: Drone instance
Allen Liff7064f2017-09-13 15:11:31 -0700145 """
146 manager = _DroneManager(manager)
147 if pidfile_id is None:
148 drone = manager.pick_drone_to_use()
149 else:
150 drone = manager.get_drone_for_pidfile(pidfile_id)
Allen Li45c2fdf2018-02-14 18:47:40 -0800151 results_dir = _results_dir(manager, job)
Allen Liff7064f2017-09-13 15:11:31 -0700152 args = [
Allen Li9d994402018-02-28 14:25:22 -0800153 _JOB_REPORTER_PATH,
154
Allen Li45c2fdf2018-02-14 18:47:40 -0800155 # General configuration
Allen Li057be2c2017-11-08 13:51:24 -0800156 '--jobdir', _get_jobdir(),
Allen Li45c2fdf2018-02-14 18:47:40 -0800157 '--run-job-path', _get_run_job_path(),
158 '--watcher-path', _get_watcher_path(),
159
160 # Job specific
Allen Lif4b62ae2017-11-09 15:48:05 -0800161 '--job-id', str(job.id),
Allen Lic7cd1de2018-02-20 17:46:22 -0800162 '--lucifer-level', 'GATHERING',
Allen Lif4b62ae2017-11-09 15:48:05 -0800163 '--autoserv-exit', str(autoserv_exit),
Allen Li45c2fdf2018-02-14 18:47:40 -0800164 '--results-dir', results_dir,
Allen Liff7064f2017-09-13 15:11:31 -0700165 ]
Allen Li9d994402018-02-28 14:25:22 -0800166 if _get_gcp_creds():
167 args = [
168 'GOOGLE_APPLICATION_CREDENTIALS=%s'
169 % pipes.quote(_get_gcp_creds()),
170 ] + args
Allen Liff7064f2017-09-13 15:11:31 -0700171 output_file = os.path.join(results_dir, 'job_reporter_output.log')
Allen Li9d994402018-02-28 14:25:22 -0800172 drone.spawn(_ENV, args, output_file=output_file)
Allen Lib07ab0f2018-01-26 17:39:47 -0800173 return drone
Allen Liff7064f2017-09-13 15:11:31 -0700174
175
Allen Li057be2c2017-11-08 13:51:24 -0800176def _get_jobdir():
Allen Li9d994402018-02-28 14:25:22 -0800177 return _config.get_config_value(_SECTION, 'jobdir')
Allen Li057be2c2017-11-08 13:51:24 -0800178
179
180def _get_run_job_path():
181 return os.path.join(_get_binaries_path(), 'lucifer_run_job')
182
183
184def _get_watcher_path():
185 return os.path.join(_get_binaries_path(), 'lucifer_watcher')
186
187
188def _get_binaries_path():
189 """Get binaries dir path from config.."""
Allen Li9d994402018-02-28 14:25:22 -0800190 return _config.get_config_value(_SECTION, 'binaries_path')
191
192
193def _get_gcp_creds():
194 """Return path to GCP service account credentials.
195
196 This is the empty string by default, if no credentials will be used.
197 """
198 return _config.get_config_value(_SECTION, 'gcp_creds', default='')
Allen Li057be2c2017-11-08 13:51:24 -0800199
200
Allen Liff7064f2017-09-13 15:11:31 -0700201class _DroneManager(object):
202 """Simplified drone API."""
203
204 def __init__(self, old_manager):
205 """Initialize instance.
206
207 @param old_manager: old style DroneManager
208 """
209 self._manager = old_manager
210
Allen Lib8b2e592017-12-14 17:41:40 -0800211 def get_num_tests_failed(self, pidfile_id):
212 """Return the number of tests failed for autoserv by pidfile.
213
214 @param pidfile_id: PidfileId instance.
215 @returns: int (-1 if missing)
216 """
217 state = self._manager.get_pidfile_contents(pidfile_id)
218 if state.num_tests_failed is None:
219 return -1
220 return state.num_tests_failed
221
Allen Liff7064f2017-09-13 15:11:31 -0700222 def get_drone_for_pidfile(self, pidfile_id):
223 """Return a drone to use from a pidfile.
224
225 @param pidfile_id: PidfileId instance.
226 """
227 return _wrap_drone(self._manager.get_drone_for_pidfile_id(pidfile_id))
228
229 def pick_drone_to_use(self, num_processes=1, prefer_ssp=False):
230 """Return a drone to use.
231
232 Various options can be passed to optimize drone selection.
233
234 @param num_processes: number of processes the drone is intended
235 to run
236 @param prefer_ssp: indicates whether drones supporting
237 server-side packaging should be preferred. The returned
238 drone is not guaranteed to support it.
239 """
240 old_drone = self._manager.pick_drone_to_use(
241 num_processes=num_processes,
242 prefer_ssp=prefer_ssp,
243 )
244 return _wrap_drone(old_drone)
245
246 def absolute_path(self, path):
247 """Return absolute path for drone results.
248
249 The returned path might be remote.
250 """
251 return self._manager.absolute_path(path)
252
253
254def _wrap_drone(old_drone):
255 """Wrap an old style drone."""
256 host = old_drone._host
257 if isinstance(host, local_host.LocalHost):
258 return LocalDrone()
259 elif isinstance(host, ssh_host.SSHHost):
260 return RemoteDrone(host)
261 else:
262 raise TypeError('Drone has an unknown host type')
263
264
265def _results_dir(manager, job):
266 """Return results dir for a job.
267
268 Path may be on a remote host.
269 """
270 return manager.absolute_path(_working_directory(job))
271
272
273def _working_directory(job):
Allen Lif4b62ae2017-11-09 15:48:05 -0800274 return _get_consistent_execution_path(job.hostqueueentry_set.all())
Allen Liff7064f2017-09-13 15:11:31 -0700275
276
277def _get_consistent_execution_path(execution_entries):
278 first_execution_path = execution_entries[0].execution_path()
279 for execution_entry in execution_entries[1:]:
Allen Li68237232018-02-16 11:13:24 -0800280 if execution_entry.execution_path() != first_execution_path:
281 raise _ExecutionPathError(
282 '%s (%s) != %s (%s)'
283 % (execution_entry.execution_path(),
284 execution_entry,
285 first_execution_path,
286 execution_entries[0]))
Allen Liff7064f2017-09-13 15:11:31 -0700287 return first_execution_path
288
289
Allen Li68237232018-02-16 11:13:24 -0800290class _ExecutionPathError(Exception):
291 """Raised by _get_consistent_execution_path()."""
292
293
Allen Liff7064f2017-09-13 15:11:31 -0700294class Drone(object):
295 """Simplified drone API."""
296
Allen Lib07ab0f2018-01-26 17:39:47 -0800297 def hostname(self):
298 """Return the hostname of the drone."""
299
Allen Liff7064f2017-09-13 15:11:31 -0700300 def spawn(self, path, args, output_file):
301 """Spawn an independent process.
302
303 path must be an absolute path. path may be on a remote machine.
304 args is a list of arguments.
305
306 The process is spawned in its own session. It should not try to
307 obtain a controlling terminal.
308
309 The new process will have stdin opened to /dev/null and stdout,
310 stderr opened to output_file.
311
312 output_file is a pathname, but how it is interpreted is
313 implementation defined, e.g., it may be a remote file.
314 """
315
316
317class LocalDrone(Drone):
318 """Local implementation of Drone."""
319
Allen Lib07ab0f2018-01-26 17:39:47 -0800320 def hostname(self):
321 return socket.gethostname()
322
Allen Liff7064f2017-09-13 15:11:31 -0700323 def spawn(self, path, args, output_file):
324 _spawn(path, [path] + args, output_file)
325
326
327class RemoteDrone(Drone):
328 """Remote implementation of Drone through SSH."""
329
330 def __init__(self, host):
331 if not isinstance(host, ssh_host.SSHHost):
332 raise TypeError('RemoteDrone must be passed an SSHHost')
333 self._host = host
334
Allen Lib07ab0f2018-01-26 17:39:47 -0800335 def hostname(self):
336 return self._host.hostname
337
Allen Liff7064f2017-09-13 15:11:31 -0700338 def spawn(self, path, args, output_file):
339 cmd_parts = [path] + args
340 safe_cmd = ' '.join(pipes.quote(part) for part in cmd_parts)
341 safe_file = pipes.quote(output_file)
342 # SSH creates a session for each command, so we do not have to
343 # do it.
Allen Li5cca8182017-11-20 13:12:51 -0800344 self._host.run('%(cmd)s <%(null)s >>%(file)s 2>&1 &'
Allen Liff7064f2017-09-13 15:11:31 -0700345 % {'cmd': safe_cmd,
346 'file': safe_file,
347 'null': os.devnull})
348
349
350def _spawn(path, argv, output_file):
351 """Spawn a new process in its own session.
352
353 path must be an absolute path. The first item in argv should be
354 path.
355
356 In the calling process, this function returns on success.
357 The forked process puts itself in its own session and execs.
358
359 The new process will have stdin opened to /dev/null and stdout,
360 stderr opened to output_file.
361 """
Allen Li5a3ed352017-11-13 15:49:20 -0800362 logger.info('Spawning %r, %r, %r', path, argv, output_file)
Allen Lif4b62ae2017-11-09 15:48:05 -0800363 assert all(isinstance(arg, basestring) for arg in argv)
Congbin Guo4204c192018-02-21 15:40:14 -0800364 pid = os.fork()
365 if pid:
366 os.waitpid(pid, 0)
Allen Liff7064f2017-09-13 15:11:31 -0700367 return
Allen Li1f0b4c22018-01-22 12:28:27 -0800368 # Double fork to reparent to init since monitor_db does not reap.
369 if os.fork():
370 os._exit(os.EX_OK)
Allen Liff7064f2017-09-13 15:11:31 -0700371 os.setsid()
372 null_fd = os.open(os.devnull, os.O_RDONLY)
373 os.dup2(null_fd, 0)
374 os.close(null_fd)
Allen Li5cca8182017-11-20 13:12:51 -0800375 out_fd = os.open(output_file, os.O_WRONLY | os.O_APPEND | os.O_CREAT)
Allen Liff7064f2017-09-13 15:11:31 -0700376 os.dup2(out_fd, 1)
377 os.dup2(out_fd, 2)
378 os.close(out_fd)
379 os.execv(path, argv)