blob: f34712386732ec5bb87609ee7b3dc9b7b54ae3e2 [file] [log] [blame]
mbligh6203ace2007-10-04 21:54:24 +00001#!/usr/bin/python -u
mbligh1ffd5dc2008-11-25 13:24:05 +00002# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
mbligh82648e52008-11-20 16:54:25 +00003# Released under the GPL v2
mblighdcd57a82007-07-11 23:06:47 +00004
mblighc8949b82007-07-23 16:33:58 +00005"""
Aviv Keshetde6bb192013-01-30 16:17:22 -08006Run a control file through the server side engine
mblighdcd57a82007-07-11 23:06:47 +00007"""
mbligh1ffd5dc2008-11-25 13:24:05 +00008
Fang Deng042c1472014-10-23 13:56:41 -07009import datetime
Paul Hobbs20cc72a2016-08-30 16:57:05 -070010import contextlib
Fang Deng042c1472014-10-23 13:56:41 -070011import getpass
12import logging
13import os
14import re
15import signal
Dan Shicf4d2032015-03-12 15:04:21 -070016import socket
Fang Deng042c1472014-10-23 13:56:41 -070017import sys
18import traceback
19import time
20import urllib2
mbligh1ffd5dc2008-11-25 13:24:05 +000021
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080022
mblighf5427bb2008-04-09 15:55:57 +000023import common
Dan Shia1ecd5c2013-06-06 11:21:31 -070024from autotest_lib.client.common_lib import control_data
Dan Shi32649b82015-08-29 20:53:36 -070025from autotest_lib.client.common_lib import error
Dan Shia1ecd5c2013-06-06 11:21:31 -070026from autotest_lib.client.common_lib import global_config
Dan Shi5ddf6a32015-05-02 00:22:01 -070027from autotest_lib.client.common_lib import utils
Dan Shi37bee222015-04-13 15:46:47 -070028from autotest_lib.client.common_lib.cros.graphite import autotest_es
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080029
30from chromite.lib import metrics
31
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -080032try:
33 from autotest_lib.puppylab import results_mocker
34except ImportError:
35 results_mocker = None
36
Dan Shia06f3e22015-09-03 16:15:15 -070037_CONFIG = global_config.global_config
38
39require_atfork = _CONFIG.get_config_value(
mblighcb8cb332009-09-03 21:08:56 +000040 'AUTOSERV', 'require_atfork_module', type=bool, default=True)
41
Dan Shia1ecd5c2013-06-06 11:21:31 -070042
Jakob Jueliche497b552014-09-23 19:11:59 -070043# Number of seconds to wait before returning if testing mode is enabled
Prashanth B6285f6a2014-05-08 18:01:27 -070044TESTING_MODE_SLEEP_SECS = 1
Jakob Jueliche497b552014-09-23 19:11:59 -070045
mblighcb8cb332009-09-03 21:08:56 +000046try:
47 import atfork
48 atfork.monkeypatch_os_fork_functions()
49 import atfork.stdlib_fixer
50 # Fix the Python standard library for threading+fork safety with its
51 # internal locks. http://code.google.com/p/python-atfork/
52 import warnings
53 warnings.filterwarnings('ignore', 'logging module already imported')
54 atfork.stdlib_fixer.fix_logging_module()
55except ImportError, e:
56 from autotest_lib.client.common_lib import global_config
Dan Shia06f3e22015-09-03 16:15:15 -070057 if _CONFIG.get_config_value(
mblighcb8cb332009-09-03 21:08:56 +000058 'AUTOSERV', 'require_atfork_module', type=bool, default=False):
59 print >>sys.stderr, 'Please run utils/build_externals.py'
60 print e
61 sys.exit(1)
mbligh9ff89cd2009-09-03 20:28:17 +000062
Kevin Cheng9b6930f2016-07-20 14:57:15 -070063from autotest_lib.server import frontend
showard75cdfee2009-06-10 17:40:41 +000064from autotest_lib.server import server_logging_config
showard043c62a2009-06-10 19:48:57 +000065from autotest_lib.server import server_job, utils, autoserv_parser, autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070066from autotest_lib.server import utils as server_utils
Paul Hobbs20cc72a2016-08-30 16:57:05 -070067from autotest_lib.server import site_utils
Kevin Chengadc99f92016-07-20 08:21:58 -070068from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Dan Shicf4d2032015-03-12 15:04:21 -070069from autotest_lib.site_utils import job_directories
Fang Deng042c1472014-10-23 13:56:41 -070070from autotest_lib.site_utils import job_overhead
Dan Shicf4d2032015-03-12 15:04:21 -070071from autotest_lib.site_utils import lxc
Dan Shi7836d252015-04-27 15:33:58 -070072from autotest_lib.site_utils import lxc_utils
showard75cdfee2009-06-10 17:40:41 +000073from autotest_lib.client.common_lib import pidfile, logging_manager
mbligh92c0fc22008-11-20 16:52:23 +000074
Paul Hobbs20cc72a2016-08-30 16:57:05 -070075
Dan Shicf4d2032015-03-12 15:04:21 -070076# Control segment to stage server-side package.
77STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
78 'stage_server_side_package')
79
Dan Shia06f3e22015-09-03 16:15:15 -070080# Command line to start servod in a moblab.
81START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s'
82STOP_SERVOD_CMD = 'sudo stop servod'
83
Alex Millerf1af17e2013-01-09 22:50:32 -080084def log_alarm(signum, frame):
85 logging.error("Received SIGALARM. Ignoring and continuing on.")
Alex Miller0528d6f2013-01-11 10:49:48 -080086 sys.exit(1)
Alex Millerf1af17e2013-01-09 22:50:32 -080087
Dan Shicf4d2032015-03-12 15:04:21 -070088
89def _get_machines(parser):
90 """Get a list of machine names from command line arg -m or a file.
91
92 @param parser: Parser for the command line arguments.
93
94 @return: A list of machine names from command line arg -m or the
95 machines file specified in the command line arg -M.
96 """
97 if parser.options.machines:
98 machines = parser.options.machines.replace(',', ' ').strip().split()
99 else:
100 machines = []
101 machines_file = parser.options.machines_file
102 if machines_file:
103 machines = []
104 for m in open(machines_file, 'r').readlines():
105 # remove comments, spaces
106 m = re.sub('#.*', '', m).strip()
107 if m:
108 machines.append(m)
109 logging.debug('Read list of machines from file: %s', machines_file)
110 logging.debug('Machines: %s', ','.join(machines))
111
112 if machines:
113 for machine in machines:
114 if not machine or re.search('\s', machine):
115 parser.parser.error("Invalid machine: %s" % str(machine))
116 machines = list(set(machines))
117 machines.sort()
118 return machines
119
120
121def _stage_ssp(parser):
122 """Stage server-side package.
123
124 This function calls a control segment to stage server-side package based on
125 the job and autoserv command line option. The detail implementation could
126 be different for each host type. Currently, only CrosHost has
127 stage_server_side_package function defined.
128 The script returns None if no server-side package is available. However,
129 it may raise exception if it failed for reasons other than artifact (the
130 server-side package) not found.
131
132 @param parser: Command line arguments parser passed in the autoserv process.
133
Dan Shi14de7622016-08-22 11:09:06 -0700134 @return: (ssp_url, error_msg), where
135 ssp_url is a url to the autotest server-side package. None if
136 server-side package is not supported.
137 error_msg is a string indicating the failures. None if server-
138 side package is staged successfully.
Dan Shicf4d2032015-03-12 15:04:21 -0700139 """
Kevin Chengadc99f92016-07-20 08:21:58 -0700140 machines_list = _get_machines(parser)
141 if bool(parser.options.lab):
142 machine_dict_list = []
143 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
144 for machine in machines_list:
Dan Shi14de7622016-08-22 11:09:06 -0700145 afe_host = afe.get_hosts(hostname=machine)[0]
146 machine_dict_list.append({'hostname': machine,
147 'afe_host': afe_host})
Kevin Chengadc99f92016-07-20 08:21:58 -0700148 machines_list = machine_dict_list
149
Dan Shi36cfd832014-10-10 13:38:51 -0700150 # If test_source_build is not specified, default to use server-side test
151 # code from build specified in --image.
Kevin Chengadc99f92016-07-20 08:21:58 -0700152 namespace = {'machines': machines_list,
Dan Shi36cfd832014-10-10 13:38:51 -0700153 'image': (parser.options.test_source_build or
154 parser.options.image),}
Dan Shicf4d2032015-03-12 15:04:21 -0700155 script_locals = {}
156 execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
Dan Shi14de7622016-08-22 11:09:06 -0700157 return script_locals['ssp_url'], script_locals['error_msg']
Dan Shicf4d2032015-03-12 15:04:21 -0700158
159
Dan Shiafa63872016-02-23 15:32:31 -0800160def _run_with_ssp(job, container_name, job_id, results, parser, ssp_url,
Dan Shi3be35af2016-08-25 23:22:40 -0700161 job_folder, machines):
Dan Shicf4d2032015-03-12 15:04:21 -0700162 """Run the server job with server-side packaging.
163
Dan Shi37befda2015-12-07 13:16:56 -0800164 @param job: The server job object.
Dan Shicf4d2032015-03-12 15:04:21 -0700165 @param container_name: Name of the container to run the test.
166 @param job_id: ID of the test job.
167 @param results: Folder to store results. This could be different from
168 parser.options.results:
169 parser.options.results can be set to None for results to be
170 stored in a temp folder.
171 results can be None for autoserv run requires no logging.
172 @param parser: Command line parser that contains the options.
173 @param ssp_url: url of the staged server-side package.
Dan Shiafa63872016-02-23 15:32:31 -0800174 @param job_folder: Name of the job result folder.
Dan Shi3be35af2016-08-25 23:22:40 -0700175 @param machines: A list of machines to run the test.
Dan Shicf4d2032015-03-12 15:04:21 -0700176 """
177 bucket = lxc.ContainerBucket()
178 control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
179 else None)
Dan Shi37befda2015-12-07 13:16:56 -0800180 try:
Dan Shi3be35af2016-08-25 23:22:40 -0700181 dut_name = machines[0] if len(machines) >= 1 else None
Dan Shi37befda2015-12-07 13:16:56 -0800182 test_container = bucket.setup_test(container_name, job_id, ssp_url,
Dan Shiafa63872016-02-23 15:32:31 -0800183 results, control=control,
Dan Shi3be35af2016-08-25 23:22:40 -0700184 job_folder=job_folder,
185 dut_name=dut_name)
Dan Shi37befda2015-12-07 13:16:56 -0800186 except Exception as e:
187 job.record('FAIL', None, None,
188 'Failed to setup container for test: %s. Check logs in '
189 'ssp_logs folder for more details.' % e)
190 raise
191
Dan Shicf4d2032015-03-12 15:04:21 -0700192 args = sys.argv[:]
193 args.remove('--require-ssp')
Dan Shi77b79a62015-07-29 16:22:05 -0700194 # --parent_job_id is only useful in autoserv running in host, not in
195 # container. Include this argument will cause test to fail for builds before
196 # CL 286265 was merged.
197 if '--parent_job_id' in args:
198 index = args.index('--parent_job_id')
199 args.remove('--parent_job_id')
200 # Remove the actual parent job id in command line arg.
201 del args[index]
Dan Shicf4d2032015-03-12 15:04:21 -0700202
203 # A dictionary of paths to replace in the command line. Key is the path to
204 # be replaced with the one in value.
205 paths_to_replace = {}
206 # Replace the control file path with the one in container.
207 if control:
208 container_control_filename = os.path.join(
209 lxc.CONTROL_TEMP_PATH, os.path.basename(control))
210 paths_to_replace[control] = container_control_filename
211 # Update result directory with the one in container.
Dan Shi65374e22016-09-15 16:14:05 -0700212 container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_folder)
Dan Shicf4d2032015-03-12 15:04:21 -0700213 if parser.options.results:
Dan Shicf4d2032015-03-12 15:04:21 -0700214 paths_to_replace[parser.options.results] = container_result_dir
215 # Update parse_job directory with the one in container. The assumption is
216 # that the result folder to be parsed is always the same as the results_dir.
217 if parser.options.parse_job:
Dan Shicf4d2032015-03-12 15:04:21 -0700218 paths_to_replace[parser.options.parse_job] = container_result_dir
219
220 args = [paths_to_replace.get(arg, arg) for arg in args]
221
222 # Apply --use-existing-results, results directory is aready created and
223 # mounted in container. Apply this arg to avoid exception being raised.
224 if not '--use-existing-results' in args:
225 args.append('--use-existing-results')
226
227 # Make sure autoserv running in container using a different pid file.
228 if not '--pidfile-label' in args:
229 args.extend(['--pidfile-label', 'container_autoserv'])
230
Dan Shid1f51232015-04-18 00:29:14 -0700231 cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
Dan Shicf4d2032015-03-12 15:04:21 -0700232 logging.info('Run command in container: %s', cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700233 success = False
Dan Shicf4d2032015-03-12 15:04:21 -0700234 try:
235 test_container.attach_run(cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700236 success = True
Dan Shi9d3454e2015-12-08 09:16:08 -0800237 except Exception as e:
238 # If the test run inside container fails without generating any log,
239 # write a message to status.log to help troubleshooting.
240 debug_files = os.listdir(os.path.join(results, 'debug'))
241 if not debug_files:
242 job.record('FAIL', None, None,
243 'Failed to run test inside the container: %s. Check '
244 'logs in ssp_logs folder for more details.' % e)
245 raise
Dan Shicf4d2032015-03-12 15:04:21 -0700246 finally:
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -0800247 metrics.Counter(
248 'chromeos/autotest/experimental/execute_job_in_ssp').increment(
249 fields={'success': success})
Dan Shi37bee222015-04-13 15:46:47 -0700250 # metadata is uploaded separately so it can use http to upload.
251 metadata = {'drone': socket.gethostname(),
252 'job_id': job_id,
253 'success': success}
254 autotest_es.post(use_http=True,
255 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
256 metadata=metadata)
Dan Shicf4d2032015-03-12 15:04:21 -0700257 test_container.destroy()
258
259
Dan Shi3f1b8a52015-04-21 11:11:06 -0700260def correct_results_folder_permission(results):
261 """Make sure the results folder has the right permission settings.
262
263 For tests running with server-side packaging, the results folder has the
264 owner of root. This must be changed to the user running the autoserv
265 process, so parsing job can access the results folder.
266 TODO(dshi): crbug.com/459344 Remove this function when test container can be
267 unprivileged container.
268
269 @param results: Path to the results folder.
270
271 """
272 if not results:
273 return
274
Dan Shi32649b82015-08-29 20:53:36 -0700275 try:
276 utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
277 utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
278 except error.CmdError as e:
279 metadata = {'error': str(e),
280 'result_folder': results,
281 'drone': socket.gethostname()}
282 autotest_es.post(use_http=True, type_str='correct_results_folder_failure',
283 metadata=metadata)
284 raise
Dan Shi3f1b8a52015-04-21 11:11:06 -0700285
286
Dan Shia06f3e22015-09-03 16:15:15 -0700287def _start_servod(machine):
288 """Try to start servod in moblab if it's not already running or running with
289 different board or port.
290
291 @param machine: Name of the dut used for test.
292 """
293 if not utils.is_moblab():
294 return
295
Dan Shi1cded882015-09-23 16:52:26 -0700296 logging.debug('Trying to start servod.')
Dan Shia06f3e22015-09-03 16:15:15 -0700297 try:
Kevin Cheng9b6930f2016-07-20 14:57:15 -0700298 afe = frontend.AFE()
Dan Shia06f3e22015-09-03 16:15:15 -0700299 board = server_utils.get_board_from_afe(machine, afe)
300 hosts = afe.get_hosts(hostname=machine)
301 servo_host = hosts[0].attributes.get('servo_host', None)
302 servo_port = hosts[0].attributes.get('servo_port', 9999)
303 if not servo_host in ['localhost', '127.0.0.1']:
Dan Shi1cded882015-09-23 16:52:26 -0700304 logging.warn('Starting servod is aborted. The dut\'s servo_host '
305 'attribute is not set to localhost.')
Dan Shia06f3e22015-09-03 16:15:15 -0700306 return
307 except (urllib2.HTTPError, urllib2.URLError):
308 # Ignore error if RPC failed to get board
309 logging.error('Failed to get board name from AFE. Start servod is '
310 'aborted')
311 return
312
313 try:
314 pid = utils.run('pgrep servod').stdout
315 cmd_line = utils.run('ps -fp %s' % pid).stdout
316 if ('--board %s' % board in cmd_line and
317 '--port %s' % servo_port in cmd_line):
318 logging.debug('Servod is already running with given board and port.'
319 ' There is no need to restart servod.')
320 return
321 logging.debug('Servod is running with different board or port. '
322 'Stopping existing servod.')
323 utils.run('sudo stop servod')
324 except error.CmdError:
325 # servod is not running.
326 pass
327
328 try:
329 utils.run(START_SERVOD_CMD % (board, servo_port))
330 logging.debug('Servod is started')
331 except error.CmdError as e:
332 logging.error('Servod failed to be started, error: %s', e)
333
334
Dan Shic68fefb2015-04-07 10:10:52 -0700335def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700336 """Run server job with given options.
337
338 @param pid_file_manager: PidFileManager used to monitor the autoserv process
339 @param results: Folder to store results.
340 @param parser: Parser for the command line arguments.
341 @param ssp_url: Url to server-side package.
Dan Shic68fefb2015-04-07 10:10:52 -0700342 @param use_ssp: Set to True to run with server-side packaging.
Dan Shicf4d2032015-03-12 15:04:21 -0700343 """
Dan Shiec1d47d2015-02-13 11:38:13 -0800344 if parser.options.warn_no_ssp:
Dan Shic68fefb2015-04-07 10:10:52 -0700345 # Post a warning in the log.
Dan Shiec1d47d2015-02-13 11:38:13 -0800346 logging.warn('Autoserv is required to run with server-side packaging. '
347 'However, no drone is found to support server-side '
348 'packaging. The test will be executed in a drone without '
349 'server-side packaging supported.')
350
jadmanski0afbb632008-06-06 21:10:57 +0000351 # send stdin to /dev/null
352 dev_null = os.open(os.devnull, os.O_RDONLY)
353 os.dup2(dev_null, sys.stdin.fileno())
354 os.close(dev_null)
mblighdbf37612007-11-24 19:38:11 +0000355
Dan Shie8aeb662016-06-30 11:22:03 -0700356 # Create separate process group if the process is not a process group
357 # leader. This allows autoserv process to keep running after the caller
358 # process (drone manager call) exits.
359 if os.getpid() != os.getpgid(0):
360 os.setsid()
mbligh1d42d4e2007-11-05 22:42:00 +0000361
Dan Shicf4d2032015-03-12 15:04:21 -0700362 # Container name is predefined so the container can be destroyed in
363 # handle_sigterm.
364 job_or_task_id = job_directories.get_job_id_or_task_id(
365 parser.options.results)
366 container_name = (lxc.TEST_CONTAINER_NAME_FMT %
Dan Shid68d51c2015-04-21 17:00:42 -0700367 (job_or_task_id, time.time(), os.getpid()))
Dan Shiafa63872016-02-23 15:32:31 -0800368 job_folder = job_directories.get_job_folder_name(parser.options.results)
Dan Shicf4d2032015-03-12 15:04:21 -0700369
jadmanski0afbb632008-06-06 21:10:57 +0000370 # Implement SIGTERM handler
mblighc2299562009-07-02 19:00:36 +0000371 def handle_sigterm(signum, frame):
Simran Basi9d9b7292013-10-16 16:44:07 -0700372 logging.debug('Received SIGTERM')
mblighff7d61f2008-12-22 14:53:35 +0000373 if pid_file_manager:
374 pid_file_manager.close_file(1, signal.SIGTERM)
Simran Basi49e21e62013-10-17 12:40:33 -0700375 logging.debug('Finished writing to pid_file. Killing process.')
Dan Shi3f1b8a52015-04-21 11:11:06 -0700376
377 # Update results folder's file permission. This needs to be done ASAP
378 # before the parsing process tries to access the log.
379 if use_ssp and results:
380 correct_results_folder_permission(results)
381
Simran Basid6b83772014-01-06 16:31:30 -0800382 # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
383 # This sleep allows the pending output to be logged before the kill
384 # signal is sent.
385 time.sleep(.1)
Dan Shic68fefb2015-04-07 10:10:52 -0700386 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700387 logging.debug('Destroy container %s before aborting the autoserv '
388 'process.', container_name)
Dan Shi3f1b8a52015-04-21 11:11:06 -0700389 metadata = {'drone': socket.gethostname(),
390 'job_id': job_or_task_id,
391 'container_name': container_name,
392 'action': 'abort',
393 'success': True}
Dan Shicf4d2032015-03-12 15:04:21 -0700394 try:
395 bucket = lxc.ContainerBucket()
396 container = bucket.get(container_name)
397 if container:
398 container.destroy()
399 else:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700400 metadata['success'] = False
401 metadata['error'] = 'container not found'
Dan Shicf4d2032015-03-12 15:04:21 -0700402 logging.debug('Container %s is not found.', container_name)
403 except:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700404 metadata['success'] = False
Dan Shi65374e22016-09-15 16:14:05 -0700405 metadata['error'] = 'Exception: %s' % str(sys.exc_info())
Dan Shicf4d2032015-03-12 15:04:21 -0700406 # Handle any exception so the autoserv process can be aborted.
Dan Shi65374e22016-09-15 16:14:05 -0700407 logging.exception('Failed to destroy container %s.',
408 container_name)
Dan Shi3f1b8a52015-04-21 11:11:06 -0700409 autotest_es.post(use_http=True,
410 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
411 metadata=metadata)
Dan Shie4a4f9f2015-07-20 09:00:25 -0700412 # Try to correct the result file permission again after the
413 # container is destroyed, as the container might have created some
414 # new files in the result folder.
415 if results:
416 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700417
jadmanski0afbb632008-06-06 21:10:57 +0000418 os.killpg(os.getpgrp(), signal.SIGKILL)
mblighfaf0cd42007-11-19 16:00:24 +0000419
jadmanski0afbb632008-06-06 21:10:57 +0000420 # Set signal handler
mblighc2299562009-07-02 19:00:36 +0000421 signal.signal(signal.SIGTERM, handle_sigterm)
mbligha46678d2008-05-01 20:00:01 +0000422
Simran Basid6b83772014-01-06 16:31:30 -0800423 # faulthandler is only needed to debug in the Lab and is not avaliable to
424 # be imported in the chroot as part of VMTest, so Try-Except it.
425 try:
426 import faulthandler
427 faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
428 logging.debug('faulthandler registered on SIGTERM.')
429 except ImportError:
Christopher Grant4beca022015-06-16 15:14:47 -0400430 sys.exc_clear()
Simran Basid6b83772014-01-06 16:31:30 -0800431
David Rochberg8a60d1e2011-02-01 14:22:07 -0500432 # Ignore SIGTTOU's generated by output from forked children.
433 signal.signal(signal.SIGTTOU, signal.SIG_IGN)
434
Alex Millerf1af17e2013-01-09 22:50:32 -0800435 # If we received a SIGALARM, let's be loud about it.
436 signal.signal(signal.SIGALRM, log_alarm)
437
mbligha5f5e542009-12-30 16:57:49 +0000438 # Server side tests that call shell scripts often depend on $USER being set
439 # but depending on how you launch your autotest scheduler it may not be set.
440 os.environ['USER'] = getpass.getuser()
441
mblighb2bea302008-07-24 20:25:57 +0000442 label = parser.options.label
mbligh374f3412009-05-13 21:29:45 +0000443 group_name = parser.options.group_name
mblighb2bea302008-07-24 20:25:57 +0000444 user = parser.options.user
445 client = parser.options.client
446 server = parser.options.server
jadmanski0afbb632008-06-06 21:10:57 +0000447 install_before = parser.options.install_before
mblighb2bea302008-07-24 20:25:57 +0000448 install_after = parser.options.install_after
449 verify = parser.options.verify
450 repair = parser.options.repair
showard45ae8192008-11-05 19:32:53 +0000451 cleanup = parser.options.cleanup
Alex Millercb79ba72013-05-29 14:43:00 -0700452 provision = parser.options.provision
Dan Shi07e09af2013-04-12 09:31:29 -0700453 reset = parser.options.reset
Alex Miller667b5f22014-02-28 15:33:39 -0800454 job_labels = parser.options.job_labels
mblighb2bea302008-07-24 20:25:57 +0000455 no_tee = parser.options.no_tee
jadmanski0afbb632008-06-06 21:10:57 +0000456 parse_job = parser.options.parse_job
mblighe7d9c602009-07-02 19:02:33 +0000457 execution_tag = parser.options.execution_tag
458 if not execution_tag:
459 execution_tag = parse_job
jadmanski0afbb632008-06-06 21:10:57 +0000460 ssh_user = parser.options.ssh_user
461 ssh_port = parser.options.ssh_port
462 ssh_pass = parser.options.ssh_pass
jadmanskidef0c3c2009-03-25 20:07:10 +0000463 collect_crashinfo = parser.options.collect_crashinfo
mblighe0cbc912010-03-11 18:03:07 +0000464 control_filename = parser.options.control_filename
Scott Zawalski91493c82013-01-25 16:15:20 -0500465 test_retry = parser.options.test_retry
beepscb6f1e22013-06-28 19:14:10 -0700466 verify_job_repo_url = parser.options.verify_job_repo_url
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700467 skip_crash_collection = parser.options.skip_crash_collection
Aviv Keshet18ee3142013-08-12 15:01:51 -0700468 ssh_verbosity = int(parser.options.ssh_verbosity)
Fang Deng6cc20de2013-09-06 15:47:32 -0700469 ssh_options = parser.options.ssh_options
Dan Shib669cbd2013-09-13 11:17:17 -0700470 no_use_packaging = parser.options.no_use_packaging
Simran Basi1bf60eb2015-12-01 16:39:29 -0800471 host_attributes = parser.options.host_attributes
472 in_lab = bool(parser.options.lab)
mbligha46678d2008-05-01 20:00:01 +0000473
mblighb2bea302008-07-24 20:25:57 +0000474 # can't be both a client and a server side test
475 if client and server:
Eric Li861b2d52011-02-04 14:50:35 -0800476 parser.parser.error("Can not specify a test as both server and client!")
mblighb2bea302008-07-24 20:25:57 +0000477
Alex Millercb79ba72013-05-29 14:43:00 -0700478 if provision and client:
479 parser.parser.error("Cannot specify provisioning and client!")
480
481 is_special_task = (verify or repair or cleanup or collect_crashinfo or
Dan Shi07e09af2013-04-12 09:31:29 -0700482 provision or reset)
Alex Millercb79ba72013-05-29 14:43:00 -0700483 if len(parser.args) < 1 and not is_special_task:
Eric Li861b2d52011-02-04 14:50:35 -0800484 parser.parser.error("Missing argument: control file")
mbligha46678d2008-05-01 20:00:01 +0000485
Aviv Keshet18ee3142013-08-12 15:01:51 -0700486 if ssh_verbosity > 0:
487 # ssh_verbosity is an integer between 0 and 3, inclusive
488 ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
Fang Dengd1c2b732013-08-20 12:59:46 -0700489 else:
490 ssh_verbosity_flag = ''
Aviv Keshet18ee3142013-08-12 15:01:51 -0700491
showard45ae8192008-11-05 19:32:53 +0000492 # We have a control file unless it's just a verify/repair/cleanup job
jadmanski0afbb632008-06-06 21:10:57 +0000493 if len(parser.args) > 0:
494 control = parser.args[0]
495 else:
496 control = None
mbligha46678d2008-05-01 20:00:01 +0000497
Dan Shicf4d2032015-03-12 15:04:21 -0700498 machines = _get_machines(parser)
mbligh374f3412009-05-13 21:29:45 +0000499 if group_name and len(machines) < 2:
Dan Shicf4d2032015-03-12 15:04:21 -0700500 parser.parser.error('-G %r may only be supplied with more than one '
501 'machine.' % group_name)
mbligh374f3412009-05-13 21:29:45 +0000502
Christopher Wiley8a91f232013-07-09 11:02:27 -0700503 kwargs = {'group_name': group_name, 'tag': execution_tag,
Dan Shicf4d2032015-03-12 15:04:21 -0700504 'disable_sysinfo': parser.options.disable_sysinfo}
Dan Shi70647ca2015-07-16 22:52:35 -0700505 if parser.options.parent_job_id:
506 kwargs['parent_job_id'] = int(parser.options.parent_job_id)
mblighe0cbc912010-03-11 18:03:07 +0000507 if control_filename:
508 kwargs['control_filename'] = control_filename
Simran Basi1bf60eb2015-12-01 16:39:29 -0800509 if host_attributes:
510 kwargs['host_attributes'] = host_attributes
511 kwargs['in_lab'] = in_lab
jadmanski0afbb632008-06-06 21:10:57 +0000512 job = server_job.server_job(control, parser.args[1:], results, label,
513 user, machines, client, parse_job,
Fang Dengd1c2b732013-08-20 12:59:46 -0700514 ssh_user, ssh_port, ssh_pass,
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700515 ssh_verbosity_flag, ssh_options,
516 test_retry, **kwargs)
Dan Shicf4d2032015-03-12 15:04:21 -0700517
showard75cdfee2009-06-10 17:40:41 +0000518 job.logging.start_logging()
mbligh4608b002010-01-05 18:22:35 +0000519 job.init_parser()
mbligha46678d2008-05-01 20:00:01 +0000520
mbligh161fe6f2008-06-19 16:26:04 +0000521 # perform checks
522 job.precheck()
523
jadmanski0afbb632008-06-06 21:10:57 +0000524 # run the job
525 exit_code = 0
Dan Shic1b8bdd2015-09-14 23:11:24 -0700526 auto_start_servod = _CONFIG.get_config_value(
527 'AUTOSERV', 'auto_start_servod', type=bool, default=False)
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700528
jadmanski0afbb632008-06-06 21:10:57 +0000529 try:
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700530 with site_utils.SetupTsMonGlobalState('autoserv', indirect=True, short_lived=True):
531 try:
532 if repair:
533 if auto_start_servod and len(machines) == 1:
534 _start_servod(machines[0])
535 job.repair(job_labels)
536 elif verify:
537 job.verify(job_labels)
538 elif provision:
539 job.provision(job_labels)
540 elif reset:
541 job.reset(job_labels)
542 elif cleanup:
543 job.cleanup(job_labels)
Dan Shicf4d2032015-03-12 15:04:21 -0700544 else:
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700545 if auto_start_servod and len(machines) == 1:
546 _start_servod(machines[0])
547 if use_ssp:
Dan Shiafa63872016-02-23 15:32:31 -0800548 try:
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700549 _run_with_ssp(job, container_name, job_or_task_id,
550 results, parser, ssp_url, job_folder,
551 machines)
552 finally:
553 # Update the ownership of files in result folder.
Dan Shiafa63872016-02-23 15:32:31 -0800554 correct_results_folder_permission(results)
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700555 else:
556 if collect_crashinfo:
557 # Update the ownership of files in result folder. If the
558 # job to collect crashinfo was running inside container
559 # (SSP) and crashed before correcting folder permission,
560 # the result folder might have wrong permission setting.
561 try:
562 correct_results_folder_permission(results)
563 except:
564 # Ignore any error as the user may not have root
565 # permission to run sudo command.
566 pass
567 job.run(install_before, install_after,
568 verify_job_repo_url=verify_job_repo_url,
569 only_collect_crashinfo=collect_crashinfo,
570 skip_crash_collection=skip_crash_collection,
571 job_labels=job_labels,
572 use_packaging=(not no_use_packaging))
573 finally:
574 while job.hosts:
575 host = job.hosts.pop()
576 host.close()
jadmanski0afbb632008-06-06 21:10:57 +0000577 except:
jadmanski27b37ea2008-10-29 23:54:31 +0000578 exit_code = 1
jadmanski0afbb632008-06-06 21:10:57 +0000579 traceback.print_exc()
mbligha46678d2008-05-01 20:00:01 +0000580
mblighff7d61f2008-12-22 14:53:35 +0000581 if pid_file_manager:
582 pid_file_manager.num_tests_failed = job.num_tests_failed
583 pid_file_manager.close_file(exit_code)
jadmanskie0dffc32008-12-15 17:30:30 +0000584 job.cleanup_parser()
showard21baa452008-10-21 00:08:39 +0000585
jadmanski27b37ea2008-10-29 23:54:31 +0000586 sys.exit(exit_code)
mbligha46678d2008-05-01 20:00:01 +0000587
588
Fang Deng042c1472014-10-23 13:56:41 -0700589def record_autoserv(options, duration_secs):
590 """Record autoserv end-to-end time in metadata db.
591
592 @param options: parser options.
593 @param duration_secs: How long autoserv has taken, in secs.
594 """
595 # Get machine hostname
596 machines = options.machines.replace(
597 ',', ' ').strip().split() if options.machines else []
598 num_machines = len(machines)
599 if num_machines > 1:
600 # Skip the case where atomic group is used.
601 return
602 elif num_machines == 0:
603 machines.append('hostless')
604
605 # Determine the status that will be reported.
606 s = job_overhead.STATUS
607 task_mapping = {
608 'reset': s.RESETTING, 'verify': s.VERIFYING,
609 'provision': s.PROVISIONING, 'repair': s.REPAIRING,
610 'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
Dan Shi888cfca2015-07-31 15:49:00 -0700611 match = filter(lambda task: getattr(options, task, False) == True,
612 task_mapping)
Fang Deng042c1472014-10-23 13:56:41 -0700613 status = task_mapping[match[0]] if match else s.RUNNING
614 is_special_task = status not in [s.RUNNING, s.GATHERING]
Dan Shicf4d2032015-03-12 15:04:21 -0700615 job_or_task_id = job_directories.get_job_id_or_task_id(options.results)
Fang Deng042c1472014-10-23 13:56:41 -0700616 job_overhead.record_state_duration(
617 job_or_task_id, machines[0], status, duration_secs,
618 is_special_task=is_special_task)
619
620
mbligha46678d2008-05-01 20:00:01 +0000621def main():
Fang Deng042c1472014-10-23 13:56:41 -0700622 start_time = datetime.datetime.now()
jadmanski0afbb632008-06-06 21:10:57 +0000623 # grab the parser
624 parser = autoserv_parser.autoserv_parser
mbligha5cb4062009-02-17 15:53:39 +0000625 parser.parse_args()
mbligha46678d2008-05-01 20:00:01 +0000626
jadmanski0afbb632008-06-06 21:10:57 +0000627 if len(sys.argv) == 1:
628 parser.parser.print_help()
629 sys.exit(1)
mbligha6f13082008-06-05 23:53:46 +0000630
Dan Shicf4d2032015-03-12 15:04:21 -0700631 # If the job requires to run with server-side package, try to stage server-
632 # side package first. If that fails with error that autotest server package
Dan Shic68fefb2015-04-07 10:10:52 -0700633 # does not exist, fall back to run the job without using server-side
634 # packaging. If option warn_no_ssp is specified, that means autoserv is
635 # running in a drone does not support SSP, thus no need to stage server-side
636 # package.
Dan Shicf4d2032015-03-12 15:04:21 -0700637 ssp_url = None
Dan Shi0b754c52015-04-20 14:20:38 -0700638 ssp_url_warning = False
Dan Shic68fefb2015-04-07 10:10:52 -0700639 if (not parser.options.warn_no_ssp and parser.options.require_ssp):
Dan Shi14de7622016-08-22 11:09:06 -0700640 ssp_url, ssp_error_msg = _stage_ssp(parser)
Dan Shi0b754c52015-04-20 14:20:38 -0700641 # The build does not have autotest server package. Fall back to not
642 # to use server-side package. Logging is postponed until logging being
643 # set up.
644 ssp_url_warning = not ssp_url
Dan Shicf4d2032015-03-12 15:04:21 -0700645
showard75cdfee2009-06-10 17:40:41 +0000646 if parser.options.no_logging:
647 results = None
648 else:
649 results = parser.options.results
mbligh80e1eba2008-11-19 00:26:18 +0000650 if not results:
651 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
Dan Shi14de7622016-08-22 11:09:06 -0700652 results = os.path.abspath(results)
showard566d3c02010-01-12 18:57:01 +0000653 resultdir_exists = False
654 for filename in ('control.srv', 'status.log', '.autoserv_execute'):
655 if os.path.exists(os.path.join(results, filename)):
656 resultdir_exists = True
mbligh4608b002010-01-05 18:22:35 +0000657 if not parser.options.use_existing_results and resultdir_exists:
mbligh80e1eba2008-11-19 00:26:18 +0000658 error = "Error: results directory already exists: %s\n" % results
659 sys.stderr.write(error)
660 sys.exit(1)
mbligha788dc42009-03-26 21:10:16 +0000661
662 # Now that we certified that there's no leftover results dir from
663 # previous jobs, lets create the result dir since the logging system
664 # needs to create the log file in there.
665 if not os.path.isdir(results):
666 os.makedirs(results)
showard75cdfee2009-06-10 17:40:41 +0000667
Dan Shic68fefb2015-04-07 10:10:52 -0700668 # Server-side packaging will only be used if it's required and the package
669 # is available. If warn_no_ssp is specified, it means that autoserv is
670 # running in a drone does not have SSP supported and a warning will be logs.
671 # Therefore, it should not run with SSP.
672 use_ssp = (not parser.options.warn_no_ssp and parser.options.require_ssp
673 and ssp_url)
674 if use_ssp:
Dan Shie28de552015-05-06 16:51:58 -0700675 log_dir = os.path.join(results, 'ssp_logs') if results else None
Dan Shicf4d2032015-03-12 15:04:21 -0700676 if log_dir and not os.path.exists(log_dir):
677 os.makedirs(log_dir)
678 else:
679 log_dir = results
Dan Shi3f1b8a52015-04-21 11:11:06 -0700680
showard75cdfee2009-06-10 17:40:41 +0000681 logging_manager.configure_logging(
Dan Shicf4d2032015-03-12 15:04:21 -0700682 server_logging_config.ServerLoggingConfig(),
683 results_dir=log_dir,
showard10d84172009-06-18 23:16:50 +0000684 use_console=not parser.options.no_tee,
685 verbose=parser.options.verbose,
686 no_console_prefix=parser.options.no_console_prefix)
Dan Shicf4d2032015-03-12 15:04:21 -0700687
Dan Shi0b754c52015-04-20 14:20:38 -0700688 if ssp_url_warning:
689 logging.warn(
690 'Autoserv is required to run with server-side packaging. '
691 'However, no server-side package can be found based on '
Dan Shi6450e142016-03-11 11:52:20 -0800692 '`--image`, host attribute job_repo_url or host OS version '
693 'label. It could be that the build to test is older than the '
694 'minimum version that supports server-side packaging. The test '
Dan Shi14de7622016-08-22 11:09:06 -0700695 'will be executed without using erver-side packaging. '
696 'Following is the detailed error:\n%s', ssp_error_msg)
Dan Shi0b754c52015-04-20 14:20:38 -0700697
showard75cdfee2009-06-10 17:40:41 +0000698 if results:
mbligha788dc42009-03-26 21:10:16 +0000699 logging.info("Results placed in %s" % results)
mbligh10717632008-11-19 00:21:57 +0000700
mbligh4608b002010-01-05 18:22:35 +0000701 # wait until now to perform this check, so it get properly logged
Dan Shicf4d2032015-03-12 15:04:21 -0700702 if (parser.options.use_existing_results and not resultdir_exists and
Dan Shiff78f112015-06-12 13:34:02 -0700703 not utils.is_in_container()):
mbligh4608b002010-01-05 18:22:35 +0000704 logging.error("No existing results directory found: %s", results)
705 sys.exit(1)
706
Dan Shicf4d2032015-03-12 15:04:21 -0700707 logging.debug('autoserv is running in drone %s.', socket.gethostname())
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700708 logging.debug('autoserv command was: %s', ' '.join(sys.argv))
mbligh4608b002010-01-05 18:22:35 +0000709
Dan Shicf4d2032015-03-12 15:04:21 -0700710 if parser.options.write_pidfile and results:
mbligh4608b002010-01-05 18:22:35 +0000711 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
712 results)
jadmanskid5ab8c52008-12-03 16:27:07 +0000713 pid_file_manager.open_file()
mblighff7d61f2008-12-22 14:53:35 +0000714 else:
715 pid_file_manager = None
mbligha46678d2008-05-01 20:00:01 +0000716
jadmanskif22fea82008-11-26 20:57:07 +0000717 autotest.BaseAutotest.set_install_in_tmpdir(
718 parser.options.install_in_tmpdir)
719
Dan Shia1ecd5c2013-06-06 11:21:31 -0700720 try:
721 # Take the first argument as control file name, get the test name from
Prathmesh Prabhueb7cfc12016-11-23 16:37:30 -0800722 # the control file.
Dan Shia1ecd5c2013-06-06 11:21:31 -0700723 if (len(parser.args) > 0 and parser.args[0] != '' and
724 parser.options.machines):
Dan Shibbc16132013-07-09 16:23:59 -0700725 try:
726 test_name = control_data.parse_control(parser.args[0],
727 raise_warnings=True).name
728 except control_data.ControlVariableException:
729 logging.debug('Failed to retrieve test name from control file.')
730 test_name = None
Dan Shia1ecd5c2013-06-06 11:21:31 -0700731 except control_data.ControlVariableException as e:
732 logging.error(str(e))
jadmanski0afbb632008-06-06 21:10:57 +0000733 exit_code = 0
Prashanth B6285f6a2014-05-08 18:01:27 -0700734 # TODO(beeps): Extend this to cover different failure modes.
735 # Testing exceptions are matched against labels sent to autoserv. Eg,
736 # to allow only the hostless job to run, specify
737 # testing_exceptions: test_suite in the shadow_config. To allow both
738 # the hostless job and dummy_Pass to run, specify
739 # testing_exceptions: test_suite,dummy_Pass. You can figure out
740 # what label autoserv is invoked with by looking through the logs of a test
741 # for the autoserv command's -l option.
Dan Shia06f3e22015-09-03 16:15:15 -0700742 testing_exceptions = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700743 'AUTOSERV', 'testing_exceptions', type=list, default=[])
Dan Shia06f3e22015-09-03 16:15:15 -0700744 test_mode = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700745 'AUTOSERV', 'testing_mode', type=bool, default=False)
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800746 test_mode = (results_mocker and test_mode and not
747 any([ex in parser.options.label
748 for ex in testing_exceptions]))
749 is_task = (parser.options.verify or parser.options.repair or
750 parser.options.provision or parser.options.reset or
751 parser.options.cleanup or parser.options.collect_crashinfo)
jadmanski0afbb632008-06-06 21:10:57 +0000752 try:
753 try:
Prashanth B6285f6a2014-05-08 18:01:27 -0700754 if test_mode:
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800755 # The parser doesn't run on tasks anyway, so we can just return
756 # happy signals without faking results.
757 if not is_task:
758 machine = parser.options.results.split('/')[-1]
759
760 # TODO(beeps): The proper way to do this would be to
761 # refactor job creation so we can invoke job.record
762 # directly. To do that one needs to pipe the test_name
763 # through run_autoserv and bail just before invoking
764 # the server job. See the comment in
765 # puppylab/results_mocker for more context.
766 results_mocker.ResultsMocker(
Prashanth Balasubramanian22dd2262014-11-28 18:19:18 -0800767 test_name if test_name else 'unknown-test',
768 parser.options.results, machine
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800769 ).mock_results()
770 return
Prashanth B6285f6a2014-05-08 18:01:27 -0700771 else:
Dan Shic68fefb2015-04-07 10:10:52 -0700772 run_autoserv(pid_file_manager, results, parser, ssp_url,
773 use_ssp)
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700774 except SystemExit as e:
jadmanski0afbb632008-06-06 21:10:57 +0000775 exit_code = e.code
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700776 if exit_code:
777 logging.exception(e)
778 except Exception as e:
jadmanski0afbb632008-06-06 21:10:57 +0000779 # If we don't know what happened, we'll classify it as
780 # an 'abort' and return 1.
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700781 logging.exception(e)
jadmanski0afbb632008-06-06 21:10:57 +0000782 exit_code = 1
783 finally:
mblighff7d61f2008-12-22 14:53:35 +0000784 if pid_file_manager:
785 pid_file_manager.close_file(exit_code)
Fang Deng042c1472014-10-23 13:56:41 -0700786 # Record the autoserv duration time. Must be called
787 # just before the system exits to ensure accuracy.
788 duration_secs = (datetime.datetime.now() - start_time).total_seconds()
789 record_autoserv(parser.options, duration_secs)
jadmanski0afbb632008-06-06 21:10:57 +0000790 sys.exit(exit_code)
mblighfaf0cd42007-11-19 16:00:24 +0000791
mblighbb421852008-03-11 22:36:16 +0000792
mbligha46678d2008-05-01 20:00:01 +0000793if __name__ == '__main__':
jadmanski0afbb632008-06-06 21:10:57 +0000794 main()