blob: 66ee79522301c9982f4c08c1247a00fde29b456f [file] [log] [blame]
mbligh6203ace2007-10-04 21:54:24 +00001#!/usr/bin/python -u
mbligh1ffd5dc2008-11-25 13:24:05 +00002# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
mbligh82648e52008-11-20 16:54:25 +00003# Released under the GPL v2
mblighdcd57a82007-07-11 23:06:47 +00004
mblighc8949b82007-07-23 16:33:58 +00005"""
Aviv Keshetde6bb192013-01-30 16:17:22 -08006Run a control file through the server side engine
mblighdcd57a82007-07-11 23:06:47 +00007"""
mbligh1ffd5dc2008-11-25 13:24:05 +00008
Fang Deng042c1472014-10-23 13:56:41 -07009import datetime
Paul Hobbs20cc72a2016-08-30 16:57:05 -070010import contextlib
Fang Deng042c1472014-10-23 13:56:41 -070011import getpass
12import logging
13import os
14import re
15import signal
Dan Shicf4d2032015-03-12 15:04:21 -070016import socket
Fang Deng042c1472014-10-23 13:56:41 -070017import sys
18import traceback
19import time
20import urllib2
mbligh1ffd5dc2008-11-25 13:24:05 +000021
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080022
mblighf5427bb2008-04-09 15:55:57 +000023import common
Dan Shia1ecd5c2013-06-06 11:21:31 -070024from autotest_lib.client.common_lib import control_data
Dan Shi32649b82015-08-29 20:53:36 -070025from autotest_lib.client.common_lib import error
Dan Shia1ecd5c2013-06-06 11:21:31 -070026from autotest_lib.client.common_lib import global_config
Dan Shi5ddf6a32015-05-02 00:22:01 -070027from autotest_lib.client.common_lib import utils
Dan Shi37bee222015-04-13 15:46:47 -070028from autotest_lib.client.common_lib.cros.graphite import autotest_es
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080029
Dan Shi5e2efb72017-02-07 11:40:23 -080030try:
31 from chromite.lib import metrics
32except ImportError:
33 metrics = utils.metrics_mock
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080034
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -080035try:
36 from autotest_lib.puppylab import results_mocker
37except ImportError:
38 results_mocker = None
39
Dan Shia06f3e22015-09-03 16:15:15 -070040_CONFIG = global_config.global_config
41
42require_atfork = _CONFIG.get_config_value(
mblighcb8cb332009-09-03 21:08:56 +000043 'AUTOSERV', 'require_atfork_module', type=bool, default=True)
44
Dan Shia1ecd5c2013-06-06 11:21:31 -070045
Jakob Jueliche497b552014-09-23 19:11:59 -070046# Number of seconds to wait before returning if testing mode is enabled
Prashanth B6285f6a2014-05-08 18:01:27 -070047TESTING_MODE_SLEEP_SECS = 1
Jakob Jueliche497b552014-09-23 19:11:59 -070048
mblighcb8cb332009-09-03 21:08:56 +000049try:
50 import atfork
51 atfork.monkeypatch_os_fork_functions()
52 import atfork.stdlib_fixer
53 # Fix the Python standard library for threading+fork safety with its
54 # internal locks. http://code.google.com/p/python-atfork/
55 import warnings
56 warnings.filterwarnings('ignore', 'logging module already imported')
57 atfork.stdlib_fixer.fix_logging_module()
58except ImportError, e:
59 from autotest_lib.client.common_lib import global_config
Dan Shia06f3e22015-09-03 16:15:15 -070060 if _CONFIG.get_config_value(
mblighcb8cb332009-09-03 21:08:56 +000061 'AUTOSERV', 'require_atfork_module', type=bool, default=False):
62 print >>sys.stderr, 'Please run utils/build_externals.py'
63 print e
64 sys.exit(1)
mbligh9ff89cd2009-09-03 20:28:17 +000065
Kevin Cheng9b6930f2016-07-20 14:57:15 -070066from autotest_lib.server import frontend
showard75cdfee2009-06-10 17:40:41 +000067from autotest_lib.server import server_logging_config
showard043c62a2009-06-10 19:48:57 +000068from autotest_lib.server import server_job, utils, autoserv_parser, autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070069from autotest_lib.server import utils as server_utils
Paul Hobbs20cc72a2016-08-30 16:57:05 -070070from autotest_lib.server import site_utils
Kevin Chengadc99f92016-07-20 08:21:58 -070071from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Dan Shicf4d2032015-03-12 15:04:21 -070072from autotest_lib.site_utils import job_directories
Fang Deng042c1472014-10-23 13:56:41 -070073from autotest_lib.site_utils import job_overhead
Dan Shicf4d2032015-03-12 15:04:21 -070074from autotest_lib.site_utils import lxc
Dan Shi7836d252015-04-27 15:33:58 -070075from autotest_lib.site_utils import lxc_utils
showard75cdfee2009-06-10 17:40:41 +000076from autotest_lib.client.common_lib import pidfile, logging_manager
mbligh92c0fc22008-11-20 16:52:23 +000077
Paul Hobbs20cc72a2016-08-30 16:57:05 -070078
Dan Shicf4d2032015-03-12 15:04:21 -070079# Control segment to stage server-side package.
80STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
81 'stage_server_side_package')
82
Dan Shia06f3e22015-09-03 16:15:15 -070083# Command line to start servod in a moblab.
84START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s'
85STOP_SERVOD_CMD = 'sudo stop servod'
86
Alex Millerf1af17e2013-01-09 22:50:32 -080087def log_alarm(signum, frame):
88 logging.error("Received SIGALARM. Ignoring and continuing on.")
Alex Miller0528d6f2013-01-11 10:49:48 -080089 sys.exit(1)
Alex Millerf1af17e2013-01-09 22:50:32 -080090
Dan Shicf4d2032015-03-12 15:04:21 -070091
92def _get_machines(parser):
93 """Get a list of machine names from command line arg -m or a file.
94
95 @param parser: Parser for the command line arguments.
96
97 @return: A list of machine names from command line arg -m or the
98 machines file specified in the command line arg -M.
99 """
100 if parser.options.machines:
101 machines = parser.options.machines.replace(',', ' ').strip().split()
102 else:
103 machines = []
104 machines_file = parser.options.machines_file
105 if machines_file:
106 machines = []
107 for m in open(machines_file, 'r').readlines():
108 # remove comments, spaces
109 m = re.sub('#.*', '', m).strip()
110 if m:
111 machines.append(m)
112 logging.debug('Read list of machines from file: %s', machines_file)
113 logging.debug('Machines: %s', ','.join(machines))
114
115 if machines:
116 for machine in machines:
117 if not machine or re.search('\s', machine):
118 parser.parser.error("Invalid machine: %s" % str(machine))
119 machines = list(set(machines))
120 machines.sort()
121 return machines
122
123
124def _stage_ssp(parser):
125 """Stage server-side package.
126
127 This function calls a control segment to stage server-side package based on
128 the job and autoserv command line option. The detail implementation could
129 be different for each host type. Currently, only CrosHost has
130 stage_server_side_package function defined.
131 The script returns None if no server-side package is available. However,
132 it may raise exception if it failed for reasons other than artifact (the
133 server-side package) not found.
134
135 @param parser: Command line arguments parser passed in the autoserv process.
136
Dan Shi14de7622016-08-22 11:09:06 -0700137 @return: (ssp_url, error_msg), where
138 ssp_url is a url to the autotest server-side package. None if
139 server-side package is not supported.
140 error_msg is a string indicating the failures. None if server-
141 side package is staged successfully.
Dan Shicf4d2032015-03-12 15:04:21 -0700142 """
Kevin Chengadc99f92016-07-20 08:21:58 -0700143 machines_list = _get_machines(parser)
Prathmesh Prabhucbebd982017-01-10 15:59:43 -0800144 machines_list = server_job.get_machine_dicts(
145 machines_list, parser.options.lab, parser.options.host_attributes)
Kevin Chengadc99f92016-07-20 08:21:58 -0700146
Dan Shi36cfd832014-10-10 13:38:51 -0700147 # If test_source_build is not specified, default to use server-side test
148 # code from build specified in --image.
Kevin Chengadc99f92016-07-20 08:21:58 -0700149 namespace = {'machines': machines_list,
Dan Shi36cfd832014-10-10 13:38:51 -0700150 'image': (parser.options.test_source_build or
151 parser.options.image),}
Dan Shicf4d2032015-03-12 15:04:21 -0700152 script_locals = {}
153 execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
Dan Shi14de7622016-08-22 11:09:06 -0700154 return script_locals['ssp_url'], script_locals['error_msg']
Dan Shicf4d2032015-03-12 15:04:21 -0700155
156
Dan Shiafa63872016-02-23 15:32:31 -0800157def _run_with_ssp(job, container_name, job_id, results, parser, ssp_url,
Dan Shi3be35af2016-08-25 23:22:40 -0700158 job_folder, machines):
Dan Shicf4d2032015-03-12 15:04:21 -0700159 """Run the server job with server-side packaging.
160
Dan Shi37befda2015-12-07 13:16:56 -0800161 @param job: The server job object.
Dan Shicf4d2032015-03-12 15:04:21 -0700162 @param container_name: Name of the container to run the test.
163 @param job_id: ID of the test job.
164 @param results: Folder to store results. This could be different from
165 parser.options.results:
166 parser.options.results can be set to None for results to be
167 stored in a temp folder.
168 results can be None for autoserv run requires no logging.
169 @param parser: Command line parser that contains the options.
170 @param ssp_url: url of the staged server-side package.
Dan Shiafa63872016-02-23 15:32:31 -0800171 @param job_folder: Name of the job result folder.
Dan Shi3be35af2016-08-25 23:22:40 -0700172 @param machines: A list of machines to run the test.
Dan Shicf4d2032015-03-12 15:04:21 -0700173 """
174 bucket = lxc.ContainerBucket()
175 control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
176 else None)
Dan Shi37befda2015-12-07 13:16:56 -0800177 try:
Dan Shi3be35af2016-08-25 23:22:40 -0700178 dut_name = machines[0] if len(machines) >= 1 else None
Dan Shi37befda2015-12-07 13:16:56 -0800179 test_container = bucket.setup_test(container_name, job_id, ssp_url,
Dan Shiafa63872016-02-23 15:32:31 -0800180 results, control=control,
Dan Shi3be35af2016-08-25 23:22:40 -0700181 job_folder=job_folder,
182 dut_name=dut_name)
Dan Shi37befda2015-12-07 13:16:56 -0800183 except Exception as e:
184 job.record('FAIL', None, None,
185 'Failed to setup container for test: %s. Check logs in '
186 'ssp_logs folder for more details.' % e)
187 raise
188
Dan Shicf4d2032015-03-12 15:04:21 -0700189 args = sys.argv[:]
190 args.remove('--require-ssp')
Dan Shi77b79a62015-07-29 16:22:05 -0700191 # --parent_job_id is only useful in autoserv running in host, not in
192 # container. Include this argument will cause test to fail for builds before
193 # CL 286265 was merged.
194 if '--parent_job_id' in args:
195 index = args.index('--parent_job_id')
196 args.remove('--parent_job_id')
197 # Remove the actual parent job id in command line arg.
198 del args[index]
Dan Shicf4d2032015-03-12 15:04:21 -0700199
200 # A dictionary of paths to replace in the command line. Key is the path to
201 # be replaced with the one in value.
202 paths_to_replace = {}
203 # Replace the control file path with the one in container.
204 if control:
205 container_control_filename = os.path.join(
206 lxc.CONTROL_TEMP_PATH, os.path.basename(control))
207 paths_to_replace[control] = container_control_filename
208 # Update result directory with the one in container.
Dan Shi65374e22016-09-15 16:14:05 -0700209 container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_folder)
Dan Shicf4d2032015-03-12 15:04:21 -0700210 if parser.options.results:
Dan Shicf4d2032015-03-12 15:04:21 -0700211 paths_to_replace[parser.options.results] = container_result_dir
212 # Update parse_job directory with the one in container. The assumption is
213 # that the result folder to be parsed is always the same as the results_dir.
214 if parser.options.parse_job:
Dan Shicf4d2032015-03-12 15:04:21 -0700215 paths_to_replace[parser.options.parse_job] = container_result_dir
216
217 args = [paths_to_replace.get(arg, arg) for arg in args]
218
219 # Apply --use-existing-results, results directory is aready created and
220 # mounted in container. Apply this arg to avoid exception being raised.
221 if not '--use-existing-results' in args:
222 args.append('--use-existing-results')
223
224 # Make sure autoserv running in container using a different pid file.
225 if not '--pidfile-label' in args:
226 args.extend(['--pidfile-label', 'container_autoserv'])
227
Dan Shid1f51232015-04-18 00:29:14 -0700228 cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
Dan Shicf4d2032015-03-12 15:04:21 -0700229 logging.info('Run command in container: %s', cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700230 success = False
Dan Shicf4d2032015-03-12 15:04:21 -0700231 try:
232 test_container.attach_run(cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700233 success = True
Dan Shi9d3454e2015-12-08 09:16:08 -0800234 except Exception as e:
235 # If the test run inside container fails without generating any log,
236 # write a message to status.log to help troubleshooting.
237 debug_files = os.listdir(os.path.join(results, 'debug'))
238 if not debug_files:
239 job.record('FAIL', None, None,
240 'Failed to run test inside the container: %s. Check '
241 'logs in ssp_logs folder for more details.' % e)
242 raise
Dan Shicf4d2032015-03-12 15:04:21 -0700243 finally:
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -0800244 metrics.Counter(
245 'chromeos/autotest/experimental/execute_job_in_ssp').increment(
246 fields={'success': success})
Dan Shi37bee222015-04-13 15:46:47 -0700247 # metadata is uploaded separately so it can use http to upload.
248 metadata = {'drone': socket.gethostname(),
249 'job_id': job_id,
250 'success': success}
251 autotest_es.post(use_http=True,
252 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
253 metadata=metadata)
Dan Shicf4d2032015-03-12 15:04:21 -0700254 test_container.destroy()
255
256
Dan Shi3f1b8a52015-04-21 11:11:06 -0700257def correct_results_folder_permission(results):
258 """Make sure the results folder has the right permission settings.
259
260 For tests running with server-side packaging, the results folder has the
261 owner of root. This must be changed to the user running the autoserv
262 process, so parsing job can access the results folder.
263 TODO(dshi): crbug.com/459344 Remove this function when test container can be
264 unprivileged container.
265
266 @param results: Path to the results folder.
267
268 """
269 if not results:
270 return
271
Dan Shi32649b82015-08-29 20:53:36 -0700272 try:
273 utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
274 utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
275 except error.CmdError as e:
276 metadata = {'error': str(e),
277 'result_folder': results,
278 'drone': socket.gethostname()}
279 autotest_es.post(use_http=True, type_str='correct_results_folder_failure',
280 metadata=metadata)
281 raise
Dan Shi3f1b8a52015-04-21 11:11:06 -0700282
283
Dan Shia06f3e22015-09-03 16:15:15 -0700284def _start_servod(machine):
285 """Try to start servod in moblab if it's not already running or running with
286 different board or port.
287
288 @param machine: Name of the dut used for test.
289 """
290 if not utils.is_moblab():
291 return
292
Dan Shi1cded882015-09-23 16:52:26 -0700293 logging.debug('Trying to start servod.')
Dan Shia06f3e22015-09-03 16:15:15 -0700294 try:
Kevin Cheng9b6930f2016-07-20 14:57:15 -0700295 afe = frontend.AFE()
Dan Shia06f3e22015-09-03 16:15:15 -0700296 board = server_utils.get_board_from_afe(machine, afe)
297 hosts = afe.get_hosts(hostname=machine)
298 servo_host = hosts[0].attributes.get('servo_host', None)
299 servo_port = hosts[0].attributes.get('servo_port', 9999)
300 if not servo_host in ['localhost', '127.0.0.1']:
Dan Shi1cded882015-09-23 16:52:26 -0700301 logging.warn('Starting servod is aborted. The dut\'s servo_host '
302 'attribute is not set to localhost.')
Dan Shia06f3e22015-09-03 16:15:15 -0700303 return
304 except (urllib2.HTTPError, urllib2.URLError):
305 # Ignore error if RPC failed to get board
306 logging.error('Failed to get board name from AFE. Start servod is '
307 'aborted')
308 return
309
310 try:
311 pid = utils.run('pgrep servod').stdout
312 cmd_line = utils.run('ps -fp %s' % pid).stdout
313 if ('--board %s' % board in cmd_line and
314 '--port %s' % servo_port in cmd_line):
315 logging.debug('Servod is already running with given board and port.'
316 ' There is no need to restart servod.')
317 return
318 logging.debug('Servod is running with different board or port. '
319 'Stopping existing servod.')
320 utils.run('sudo stop servod')
321 except error.CmdError:
322 # servod is not running.
323 pass
324
325 try:
326 utils.run(START_SERVOD_CMD % (board, servo_port))
327 logging.debug('Servod is started')
328 except error.CmdError as e:
329 logging.error('Servod failed to be started, error: %s', e)
330
331
Dan Shic68fefb2015-04-07 10:10:52 -0700332def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700333 """Run server job with given options.
334
335 @param pid_file_manager: PidFileManager used to monitor the autoserv process
336 @param results: Folder to store results.
337 @param parser: Parser for the command line arguments.
338 @param ssp_url: Url to server-side package.
Dan Shic68fefb2015-04-07 10:10:52 -0700339 @param use_ssp: Set to True to run with server-side packaging.
Dan Shicf4d2032015-03-12 15:04:21 -0700340 """
Dan Shiec1d47d2015-02-13 11:38:13 -0800341 if parser.options.warn_no_ssp:
Dan Shic68fefb2015-04-07 10:10:52 -0700342 # Post a warning in the log.
Dan Shiec1d47d2015-02-13 11:38:13 -0800343 logging.warn('Autoserv is required to run with server-side packaging. '
344 'However, no drone is found to support server-side '
345 'packaging. The test will be executed in a drone without '
346 'server-side packaging supported.')
347
jadmanski0afbb632008-06-06 21:10:57 +0000348 # send stdin to /dev/null
349 dev_null = os.open(os.devnull, os.O_RDONLY)
350 os.dup2(dev_null, sys.stdin.fileno())
351 os.close(dev_null)
mblighdbf37612007-11-24 19:38:11 +0000352
Dan Shie8aeb662016-06-30 11:22:03 -0700353 # Create separate process group if the process is not a process group
354 # leader. This allows autoserv process to keep running after the caller
355 # process (drone manager call) exits.
356 if os.getpid() != os.getpgid(0):
357 os.setsid()
mbligh1d42d4e2007-11-05 22:42:00 +0000358
Dan Shicf4d2032015-03-12 15:04:21 -0700359 # Container name is predefined so the container can be destroyed in
360 # handle_sigterm.
361 job_or_task_id = job_directories.get_job_id_or_task_id(
362 parser.options.results)
363 container_name = (lxc.TEST_CONTAINER_NAME_FMT %
Dan Shid68d51c2015-04-21 17:00:42 -0700364 (job_or_task_id, time.time(), os.getpid()))
Dan Shiafa63872016-02-23 15:32:31 -0800365 job_folder = job_directories.get_job_folder_name(parser.options.results)
Dan Shicf4d2032015-03-12 15:04:21 -0700366
jadmanski0afbb632008-06-06 21:10:57 +0000367 # Implement SIGTERM handler
mblighc2299562009-07-02 19:00:36 +0000368 def handle_sigterm(signum, frame):
Simran Basi9d9b7292013-10-16 16:44:07 -0700369 logging.debug('Received SIGTERM')
mblighff7d61f2008-12-22 14:53:35 +0000370 if pid_file_manager:
371 pid_file_manager.close_file(1, signal.SIGTERM)
Simran Basi49e21e62013-10-17 12:40:33 -0700372 logging.debug('Finished writing to pid_file. Killing process.')
Dan Shi3f1b8a52015-04-21 11:11:06 -0700373
374 # Update results folder's file permission. This needs to be done ASAP
375 # before the parsing process tries to access the log.
376 if use_ssp and results:
377 correct_results_folder_permission(results)
378
Simran Basid6b83772014-01-06 16:31:30 -0800379 # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
380 # This sleep allows the pending output to be logged before the kill
381 # signal is sent.
382 time.sleep(.1)
Dan Shic68fefb2015-04-07 10:10:52 -0700383 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700384 logging.debug('Destroy container %s before aborting the autoserv '
385 'process.', container_name)
Dan Shi3f1b8a52015-04-21 11:11:06 -0700386 metadata = {'drone': socket.gethostname(),
387 'job_id': job_or_task_id,
388 'container_name': container_name,
389 'action': 'abort',
390 'success': True}
Dan Shicf4d2032015-03-12 15:04:21 -0700391 try:
392 bucket = lxc.ContainerBucket()
393 container = bucket.get(container_name)
394 if container:
395 container.destroy()
396 else:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700397 metadata['success'] = False
398 metadata['error'] = 'container not found'
Dan Shicf4d2032015-03-12 15:04:21 -0700399 logging.debug('Container %s is not found.', container_name)
400 except:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700401 metadata['success'] = False
Dan Shi65374e22016-09-15 16:14:05 -0700402 metadata['error'] = 'Exception: %s' % str(sys.exc_info())
Dan Shicf4d2032015-03-12 15:04:21 -0700403 # Handle any exception so the autoserv process can be aborted.
Dan Shi65374e22016-09-15 16:14:05 -0700404 logging.exception('Failed to destroy container %s.',
405 container_name)
Dan Shi3f1b8a52015-04-21 11:11:06 -0700406 autotest_es.post(use_http=True,
407 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
408 metadata=metadata)
Dan Shie4a4f9f2015-07-20 09:00:25 -0700409 # Try to correct the result file permission again after the
410 # container is destroyed, as the container might have created some
411 # new files in the result folder.
412 if results:
413 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700414
jadmanski0afbb632008-06-06 21:10:57 +0000415 os.killpg(os.getpgrp(), signal.SIGKILL)
mblighfaf0cd42007-11-19 16:00:24 +0000416
jadmanski0afbb632008-06-06 21:10:57 +0000417 # Set signal handler
mblighc2299562009-07-02 19:00:36 +0000418 signal.signal(signal.SIGTERM, handle_sigterm)
mbligha46678d2008-05-01 20:00:01 +0000419
Simran Basid6b83772014-01-06 16:31:30 -0800420 # faulthandler is only needed to debug in the Lab and is not avaliable to
421 # be imported in the chroot as part of VMTest, so Try-Except it.
422 try:
423 import faulthandler
424 faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
425 logging.debug('faulthandler registered on SIGTERM.')
426 except ImportError:
Christopher Grant4beca022015-06-16 15:14:47 -0400427 sys.exc_clear()
Simran Basid6b83772014-01-06 16:31:30 -0800428
David Rochberg8a60d1e2011-02-01 14:22:07 -0500429 # Ignore SIGTTOU's generated by output from forked children.
430 signal.signal(signal.SIGTTOU, signal.SIG_IGN)
431
Alex Millerf1af17e2013-01-09 22:50:32 -0800432 # If we received a SIGALARM, let's be loud about it.
433 signal.signal(signal.SIGALRM, log_alarm)
434
mbligha5f5e542009-12-30 16:57:49 +0000435 # Server side tests that call shell scripts often depend on $USER being set
436 # but depending on how you launch your autotest scheduler it may not be set.
437 os.environ['USER'] = getpass.getuser()
438
mblighb2bea302008-07-24 20:25:57 +0000439 label = parser.options.label
mbligh374f3412009-05-13 21:29:45 +0000440 group_name = parser.options.group_name
mblighb2bea302008-07-24 20:25:57 +0000441 user = parser.options.user
442 client = parser.options.client
443 server = parser.options.server
jadmanski0afbb632008-06-06 21:10:57 +0000444 install_before = parser.options.install_before
mblighb2bea302008-07-24 20:25:57 +0000445 install_after = parser.options.install_after
446 verify = parser.options.verify
447 repair = parser.options.repair
showard45ae8192008-11-05 19:32:53 +0000448 cleanup = parser.options.cleanup
Alex Millercb79ba72013-05-29 14:43:00 -0700449 provision = parser.options.provision
Dan Shi07e09af2013-04-12 09:31:29 -0700450 reset = parser.options.reset
Alex Miller667b5f22014-02-28 15:33:39 -0800451 job_labels = parser.options.job_labels
mblighb2bea302008-07-24 20:25:57 +0000452 no_tee = parser.options.no_tee
jadmanski0afbb632008-06-06 21:10:57 +0000453 parse_job = parser.options.parse_job
mblighe7d9c602009-07-02 19:02:33 +0000454 execution_tag = parser.options.execution_tag
455 if not execution_tag:
456 execution_tag = parse_job
jadmanski0afbb632008-06-06 21:10:57 +0000457 ssh_user = parser.options.ssh_user
458 ssh_port = parser.options.ssh_port
459 ssh_pass = parser.options.ssh_pass
jadmanskidef0c3c2009-03-25 20:07:10 +0000460 collect_crashinfo = parser.options.collect_crashinfo
mblighe0cbc912010-03-11 18:03:07 +0000461 control_filename = parser.options.control_filename
Scott Zawalski91493c82013-01-25 16:15:20 -0500462 test_retry = parser.options.test_retry
beepscb6f1e22013-06-28 19:14:10 -0700463 verify_job_repo_url = parser.options.verify_job_repo_url
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700464 skip_crash_collection = parser.options.skip_crash_collection
Aviv Keshet18ee3142013-08-12 15:01:51 -0700465 ssh_verbosity = int(parser.options.ssh_verbosity)
Fang Deng6cc20de2013-09-06 15:47:32 -0700466 ssh_options = parser.options.ssh_options
Dan Shib669cbd2013-09-13 11:17:17 -0700467 no_use_packaging = parser.options.no_use_packaging
Simran Basi1bf60eb2015-12-01 16:39:29 -0800468 host_attributes = parser.options.host_attributes
469 in_lab = bool(parser.options.lab)
mbligha46678d2008-05-01 20:00:01 +0000470
mblighb2bea302008-07-24 20:25:57 +0000471 # can't be both a client and a server side test
472 if client and server:
Eric Li861b2d52011-02-04 14:50:35 -0800473 parser.parser.error("Can not specify a test as both server and client!")
mblighb2bea302008-07-24 20:25:57 +0000474
Alex Millercb79ba72013-05-29 14:43:00 -0700475 if provision and client:
476 parser.parser.error("Cannot specify provisioning and client!")
477
478 is_special_task = (verify or repair or cleanup or collect_crashinfo or
Dan Shi07e09af2013-04-12 09:31:29 -0700479 provision or reset)
Alex Millercb79ba72013-05-29 14:43:00 -0700480 if len(parser.args) < 1 and not is_special_task:
Eric Li861b2d52011-02-04 14:50:35 -0800481 parser.parser.error("Missing argument: control file")
mbligha46678d2008-05-01 20:00:01 +0000482
Aviv Keshet18ee3142013-08-12 15:01:51 -0700483 if ssh_verbosity > 0:
484 # ssh_verbosity is an integer between 0 and 3, inclusive
485 ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
Fang Dengd1c2b732013-08-20 12:59:46 -0700486 else:
487 ssh_verbosity_flag = ''
Aviv Keshet18ee3142013-08-12 15:01:51 -0700488
showard45ae8192008-11-05 19:32:53 +0000489 # We have a control file unless it's just a verify/repair/cleanup job
jadmanski0afbb632008-06-06 21:10:57 +0000490 if len(parser.args) > 0:
491 control = parser.args[0]
492 else:
493 control = None
mbligha46678d2008-05-01 20:00:01 +0000494
Dan Shicf4d2032015-03-12 15:04:21 -0700495 machines = _get_machines(parser)
mbligh374f3412009-05-13 21:29:45 +0000496 if group_name and len(machines) < 2:
Dan Shicf4d2032015-03-12 15:04:21 -0700497 parser.parser.error('-G %r may only be supplied with more than one '
498 'machine.' % group_name)
mbligh374f3412009-05-13 21:29:45 +0000499
Christopher Wiley8a91f232013-07-09 11:02:27 -0700500 kwargs = {'group_name': group_name, 'tag': execution_tag,
Dan Shicf4d2032015-03-12 15:04:21 -0700501 'disable_sysinfo': parser.options.disable_sysinfo}
Dan Shi70647ca2015-07-16 22:52:35 -0700502 if parser.options.parent_job_id:
503 kwargs['parent_job_id'] = int(parser.options.parent_job_id)
mblighe0cbc912010-03-11 18:03:07 +0000504 if control_filename:
505 kwargs['control_filename'] = control_filename
Simran Basi1bf60eb2015-12-01 16:39:29 -0800506 if host_attributes:
507 kwargs['host_attributes'] = host_attributes
508 kwargs['in_lab'] = in_lab
jadmanski0afbb632008-06-06 21:10:57 +0000509 job = server_job.server_job(control, parser.args[1:], results, label,
510 user, machines, client, parse_job,
Fang Dengd1c2b732013-08-20 12:59:46 -0700511 ssh_user, ssh_port, ssh_pass,
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700512 ssh_verbosity_flag, ssh_options,
513 test_retry, **kwargs)
Dan Shicf4d2032015-03-12 15:04:21 -0700514
showard75cdfee2009-06-10 17:40:41 +0000515 job.logging.start_logging()
mbligh4608b002010-01-05 18:22:35 +0000516 job.init_parser()
mbligha46678d2008-05-01 20:00:01 +0000517
mbligh161fe6f2008-06-19 16:26:04 +0000518 # perform checks
519 job.precheck()
520
jadmanski0afbb632008-06-06 21:10:57 +0000521 # run the job
522 exit_code = 0
Dan Shic1b8bdd2015-09-14 23:11:24 -0700523 auto_start_servod = _CONFIG.get_config_value(
524 'AUTOSERV', 'auto_start_servod', type=bool, default=False)
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700525
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800526 site_utils.SetupTsMonGlobalState('autoserv', indirect=False,
527 short_lived=True)
jadmanski0afbb632008-06-06 21:10:57 +0000528 try:
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800529 try:
530 if repair:
531 if auto_start_servod and len(machines) == 1:
532 _start_servod(machines[0])
533 job.repair(job_labels)
534 elif verify:
535 job.verify(job_labels)
536 elif provision:
537 job.provision(job_labels)
538 elif reset:
539 job.reset(job_labels)
540 elif cleanup:
541 job.cleanup(job_labels)
542 else:
543 if auto_start_servod and len(machines) == 1:
544 _start_servod(machines[0])
545 if use_ssp:
546 try:
547 _run_with_ssp(job, container_name, job_or_task_id,
548 results, parser, ssp_url, job_folder,
549 machines)
550 finally:
551 # Update the ownership of files in result folder.
552 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700553 else:
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800554 if collect_crashinfo:
555 # Update the ownership of files in result folder. If the
556 # job to collect crashinfo was running inside container
557 # (SSP) and crashed before correcting folder permission,
558 # the result folder might have wrong permission setting.
Dan Shiafa63872016-02-23 15:32:31 -0800559 try:
560 correct_results_folder_permission(results)
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800561 except:
562 # Ignore any error as the user may not have root
563 # permission to run sudo command.
564 pass
565 job.run(install_before, install_after,
566 verify_job_repo_url=verify_job_repo_url,
567 only_collect_crashinfo=collect_crashinfo,
568 skip_crash_collection=skip_crash_collection,
569 job_labels=job_labels,
570 use_packaging=(not no_use_packaging))
571 finally:
572 while job.hosts:
573 host = job.hosts.pop()
574 host.close()
jadmanski0afbb632008-06-06 21:10:57 +0000575 except:
jadmanski27b37ea2008-10-29 23:54:31 +0000576 exit_code = 1
jadmanski0afbb632008-06-06 21:10:57 +0000577 traceback.print_exc()
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800578 finally:
579 metrics.Flush()
mbligha46678d2008-05-01 20:00:01 +0000580
mblighff7d61f2008-12-22 14:53:35 +0000581 if pid_file_manager:
582 pid_file_manager.num_tests_failed = job.num_tests_failed
583 pid_file_manager.close_file(exit_code)
jadmanskie0dffc32008-12-15 17:30:30 +0000584 job.cleanup_parser()
showard21baa452008-10-21 00:08:39 +0000585
jadmanski27b37ea2008-10-29 23:54:31 +0000586 sys.exit(exit_code)
mbligha46678d2008-05-01 20:00:01 +0000587
588
Fang Deng042c1472014-10-23 13:56:41 -0700589def record_autoserv(options, duration_secs):
590 """Record autoserv end-to-end time in metadata db.
591
592 @param options: parser options.
593 @param duration_secs: How long autoserv has taken, in secs.
594 """
595 # Get machine hostname
596 machines = options.machines.replace(
597 ',', ' ').strip().split() if options.machines else []
598 num_machines = len(machines)
599 if num_machines > 1:
600 # Skip the case where atomic group is used.
601 return
602 elif num_machines == 0:
603 machines.append('hostless')
604
605 # Determine the status that will be reported.
606 s = job_overhead.STATUS
607 task_mapping = {
608 'reset': s.RESETTING, 'verify': s.VERIFYING,
609 'provision': s.PROVISIONING, 'repair': s.REPAIRING,
610 'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
Dan Shi888cfca2015-07-31 15:49:00 -0700611 match = filter(lambda task: getattr(options, task, False) == True,
612 task_mapping)
Fang Deng042c1472014-10-23 13:56:41 -0700613 status = task_mapping[match[0]] if match else s.RUNNING
614 is_special_task = status not in [s.RUNNING, s.GATHERING]
Dan Shicf4d2032015-03-12 15:04:21 -0700615 job_or_task_id = job_directories.get_job_id_or_task_id(options.results)
Fang Deng042c1472014-10-23 13:56:41 -0700616 job_overhead.record_state_duration(
617 job_or_task_id, machines[0], status, duration_secs,
618 is_special_task=is_special_task)
619
620
mbligha46678d2008-05-01 20:00:01 +0000621def main():
Fang Deng042c1472014-10-23 13:56:41 -0700622 start_time = datetime.datetime.now()
jadmanski0afbb632008-06-06 21:10:57 +0000623 # grab the parser
624 parser = autoserv_parser.autoserv_parser
mbligha5cb4062009-02-17 15:53:39 +0000625 parser.parse_args()
mbligha46678d2008-05-01 20:00:01 +0000626
jadmanski0afbb632008-06-06 21:10:57 +0000627 if len(sys.argv) == 1:
628 parser.parser.print_help()
629 sys.exit(1)
mbligha6f13082008-06-05 23:53:46 +0000630
Dan Shicf4d2032015-03-12 15:04:21 -0700631 # If the job requires to run with server-side package, try to stage server-
632 # side package first. If that fails with error that autotest server package
Dan Shic68fefb2015-04-07 10:10:52 -0700633 # does not exist, fall back to run the job without using server-side
634 # packaging. If option warn_no_ssp is specified, that means autoserv is
635 # running in a drone does not support SSP, thus no need to stage server-side
636 # package.
Dan Shicf4d2032015-03-12 15:04:21 -0700637 ssp_url = None
Dan Shi0b754c52015-04-20 14:20:38 -0700638 ssp_url_warning = False
Dan Shic68fefb2015-04-07 10:10:52 -0700639 if (not parser.options.warn_no_ssp and parser.options.require_ssp):
Dan Shi14de7622016-08-22 11:09:06 -0700640 ssp_url, ssp_error_msg = _stage_ssp(parser)
Dan Shi0b754c52015-04-20 14:20:38 -0700641 # The build does not have autotest server package. Fall back to not
642 # to use server-side package. Logging is postponed until logging being
643 # set up.
644 ssp_url_warning = not ssp_url
Dan Shicf4d2032015-03-12 15:04:21 -0700645
showard75cdfee2009-06-10 17:40:41 +0000646 if parser.options.no_logging:
647 results = None
648 else:
649 results = parser.options.results
mbligh80e1eba2008-11-19 00:26:18 +0000650 if not results:
651 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
Dan Shi14de7622016-08-22 11:09:06 -0700652 results = os.path.abspath(results)
showard566d3c02010-01-12 18:57:01 +0000653 resultdir_exists = False
654 for filename in ('control.srv', 'status.log', '.autoserv_execute'):
655 if os.path.exists(os.path.join(results, filename)):
656 resultdir_exists = True
mbligh4608b002010-01-05 18:22:35 +0000657 if not parser.options.use_existing_results and resultdir_exists:
mbligh80e1eba2008-11-19 00:26:18 +0000658 error = "Error: results directory already exists: %s\n" % results
659 sys.stderr.write(error)
660 sys.exit(1)
mbligha788dc42009-03-26 21:10:16 +0000661
662 # Now that we certified that there's no leftover results dir from
663 # previous jobs, lets create the result dir since the logging system
664 # needs to create the log file in there.
665 if not os.path.isdir(results):
666 os.makedirs(results)
showard75cdfee2009-06-10 17:40:41 +0000667
Dan Shic68fefb2015-04-07 10:10:52 -0700668 # Server-side packaging will only be used if it's required and the package
669 # is available. If warn_no_ssp is specified, it means that autoserv is
670 # running in a drone does not have SSP supported and a warning will be logs.
671 # Therefore, it should not run with SSP.
672 use_ssp = (not parser.options.warn_no_ssp and parser.options.require_ssp
673 and ssp_url)
674 if use_ssp:
Dan Shie28de552015-05-06 16:51:58 -0700675 log_dir = os.path.join(results, 'ssp_logs') if results else None
Dan Shicf4d2032015-03-12 15:04:21 -0700676 if log_dir and not os.path.exists(log_dir):
677 os.makedirs(log_dir)
678 else:
679 log_dir = results
Dan Shi3f1b8a52015-04-21 11:11:06 -0700680
showard75cdfee2009-06-10 17:40:41 +0000681 logging_manager.configure_logging(
Dan Shicf4d2032015-03-12 15:04:21 -0700682 server_logging_config.ServerLoggingConfig(),
683 results_dir=log_dir,
showard10d84172009-06-18 23:16:50 +0000684 use_console=not parser.options.no_tee,
685 verbose=parser.options.verbose,
686 no_console_prefix=parser.options.no_console_prefix)
Dan Shicf4d2032015-03-12 15:04:21 -0700687
Dan Shi0b754c52015-04-20 14:20:38 -0700688 if ssp_url_warning:
689 logging.warn(
690 'Autoserv is required to run with server-side packaging. '
691 'However, no server-side package can be found based on '
Dan Shi6450e142016-03-11 11:52:20 -0800692 '`--image`, host attribute job_repo_url or host OS version '
693 'label. It could be that the build to test is older than the '
694 'minimum version that supports server-side packaging. The test '
Dan Shi14de7622016-08-22 11:09:06 -0700695 'will be executed without using erver-side packaging. '
696 'Following is the detailed error:\n%s', ssp_error_msg)
Dan Shi0b754c52015-04-20 14:20:38 -0700697
showard75cdfee2009-06-10 17:40:41 +0000698 if results:
mbligha788dc42009-03-26 21:10:16 +0000699 logging.info("Results placed in %s" % results)
mbligh10717632008-11-19 00:21:57 +0000700
mbligh4608b002010-01-05 18:22:35 +0000701 # wait until now to perform this check, so it get properly logged
Dan Shicf4d2032015-03-12 15:04:21 -0700702 if (parser.options.use_existing_results and not resultdir_exists and
Dan Shiff78f112015-06-12 13:34:02 -0700703 not utils.is_in_container()):
mbligh4608b002010-01-05 18:22:35 +0000704 logging.error("No existing results directory found: %s", results)
705 sys.exit(1)
706
Dan Shicf4d2032015-03-12 15:04:21 -0700707 logging.debug('autoserv is running in drone %s.', socket.gethostname())
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700708 logging.debug('autoserv command was: %s', ' '.join(sys.argv))
mbligh4608b002010-01-05 18:22:35 +0000709
Dan Shicf4d2032015-03-12 15:04:21 -0700710 if parser.options.write_pidfile and results:
mbligh4608b002010-01-05 18:22:35 +0000711 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
712 results)
jadmanskid5ab8c52008-12-03 16:27:07 +0000713 pid_file_manager.open_file()
mblighff7d61f2008-12-22 14:53:35 +0000714 else:
715 pid_file_manager = None
mbligha46678d2008-05-01 20:00:01 +0000716
jadmanskif22fea82008-11-26 20:57:07 +0000717 autotest.BaseAutotest.set_install_in_tmpdir(
718 parser.options.install_in_tmpdir)
719
Dan Shia1ecd5c2013-06-06 11:21:31 -0700720 try:
721 # Take the first argument as control file name, get the test name from
Prathmesh Prabhueb7cfc12016-11-23 16:37:30 -0800722 # the control file.
Dan Shia1ecd5c2013-06-06 11:21:31 -0700723 if (len(parser.args) > 0 and parser.args[0] != '' and
724 parser.options.machines):
Dan Shibbc16132013-07-09 16:23:59 -0700725 try:
726 test_name = control_data.parse_control(parser.args[0],
727 raise_warnings=True).name
728 except control_data.ControlVariableException:
729 logging.debug('Failed to retrieve test name from control file.')
730 test_name = None
Dan Shia1ecd5c2013-06-06 11:21:31 -0700731 except control_data.ControlVariableException as e:
732 logging.error(str(e))
jadmanski0afbb632008-06-06 21:10:57 +0000733 exit_code = 0
Prashanth B6285f6a2014-05-08 18:01:27 -0700734 # TODO(beeps): Extend this to cover different failure modes.
735 # Testing exceptions are matched against labels sent to autoserv. Eg,
736 # to allow only the hostless job to run, specify
737 # testing_exceptions: test_suite in the shadow_config. To allow both
738 # the hostless job and dummy_Pass to run, specify
739 # testing_exceptions: test_suite,dummy_Pass. You can figure out
740 # what label autoserv is invoked with by looking through the logs of a test
741 # for the autoserv command's -l option.
Dan Shia06f3e22015-09-03 16:15:15 -0700742 testing_exceptions = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700743 'AUTOSERV', 'testing_exceptions', type=list, default=[])
Dan Shia06f3e22015-09-03 16:15:15 -0700744 test_mode = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700745 'AUTOSERV', 'testing_mode', type=bool, default=False)
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800746 test_mode = (results_mocker and test_mode and not
747 any([ex in parser.options.label
748 for ex in testing_exceptions]))
749 is_task = (parser.options.verify or parser.options.repair or
750 parser.options.provision or parser.options.reset or
751 parser.options.cleanup or parser.options.collect_crashinfo)
jadmanski0afbb632008-06-06 21:10:57 +0000752 try:
753 try:
Prashanth B6285f6a2014-05-08 18:01:27 -0700754 if test_mode:
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800755 # The parser doesn't run on tasks anyway, so we can just return
756 # happy signals without faking results.
757 if not is_task:
758 machine = parser.options.results.split('/')[-1]
759
760 # TODO(beeps): The proper way to do this would be to
761 # refactor job creation so we can invoke job.record
762 # directly. To do that one needs to pipe the test_name
763 # through run_autoserv and bail just before invoking
764 # the server job. See the comment in
765 # puppylab/results_mocker for more context.
766 results_mocker.ResultsMocker(
Prashanth Balasubramanian22dd2262014-11-28 18:19:18 -0800767 test_name if test_name else 'unknown-test',
768 parser.options.results, machine
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800769 ).mock_results()
770 return
Prashanth B6285f6a2014-05-08 18:01:27 -0700771 else:
Dan Shic68fefb2015-04-07 10:10:52 -0700772 run_autoserv(pid_file_manager, results, parser, ssp_url,
773 use_ssp)
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700774 except SystemExit as e:
jadmanski0afbb632008-06-06 21:10:57 +0000775 exit_code = e.code
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700776 if exit_code:
777 logging.exception(e)
778 except Exception as e:
jadmanski0afbb632008-06-06 21:10:57 +0000779 # If we don't know what happened, we'll classify it as
780 # an 'abort' and return 1.
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700781 logging.exception(e)
jadmanski0afbb632008-06-06 21:10:57 +0000782 exit_code = 1
783 finally:
mblighff7d61f2008-12-22 14:53:35 +0000784 if pid_file_manager:
785 pid_file_manager.close_file(exit_code)
Fang Deng042c1472014-10-23 13:56:41 -0700786 # Record the autoserv duration time. Must be called
787 # just before the system exits to ensure accuracy.
788 duration_secs = (datetime.datetime.now() - start_time).total_seconds()
789 record_autoserv(parser.options, duration_secs)
jadmanski0afbb632008-06-06 21:10:57 +0000790 sys.exit(exit_code)
mblighfaf0cd42007-11-19 16:00:24 +0000791
mblighbb421852008-03-11 22:36:16 +0000792
mbligha46678d2008-05-01 20:00:01 +0000793if __name__ == '__main__':
jadmanski0afbb632008-06-06 21:10:57 +0000794 main()