blob: 37e073c4d0d43f1a6953576f17643dd3fe6791e1 [file] [log] [blame]
mbligh6203ace2007-10-04 21:54:24 +00001#!/usr/bin/python -u
mbligh1ffd5dc2008-11-25 13:24:05 +00002# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
mbligh82648e52008-11-20 16:54:25 +00003# Released under the GPL v2
mblighdcd57a82007-07-11 23:06:47 +00004
mblighc8949b82007-07-23 16:33:58 +00005"""
Aviv Keshetde6bb192013-01-30 16:17:22 -08006Run a control file through the server side engine
mblighdcd57a82007-07-11 23:06:47 +00007"""
mbligh1ffd5dc2008-11-25 13:24:05 +00008
Fang Deng042c1472014-10-23 13:56:41 -07009import datetime
Paul Hobbs20cc72a2016-08-30 16:57:05 -070010import contextlib
Fang Deng042c1472014-10-23 13:56:41 -070011import getpass
12import logging
13import os
14import re
15import signal
Dan Shicf4d2032015-03-12 15:04:21 -070016import socket
Fang Deng042c1472014-10-23 13:56:41 -070017import sys
18import traceback
19import time
20import urllib2
mbligh1ffd5dc2008-11-25 13:24:05 +000021
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080022
mblighf5427bb2008-04-09 15:55:57 +000023import common
Dan Shia1ecd5c2013-06-06 11:21:31 -070024from autotest_lib.client.common_lib import control_data
Dan Shi32649b82015-08-29 20:53:36 -070025from autotest_lib.client.common_lib import error
Dan Shia1ecd5c2013-06-06 11:21:31 -070026from autotest_lib.client.common_lib import global_config
Dan Shi5ddf6a32015-05-02 00:22:01 -070027from autotest_lib.client.common_lib import utils
Dan Shi37bee222015-04-13 15:46:47 -070028from autotest_lib.client.common_lib.cros.graphite import autotest_es
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080029
30from chromite.lib import metrics
31
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -080032try:
33 from autotest_lib.puppylab import results_mocker
34except ImportError:
35 results_mocker = None
36
Dan Shia06f3e22015-09-03 16:15:15 -070037_CONFIG = global_config.global_config
38
39require_atfork = _CONFIG.get_config_value(
mblighcb8cb332009-09-03 21:08:56 +000040 'AUTOSERV', 'require_atfork_module', type=bool, default=True)
41
Dan Shia1ecd5c2013-06-06 11:21:31 -070042
Jakob Jueliche497b552014-09-23 19:11:59 -070043# Number of seconds to wait before returning if testing mode is enabled
Prashanth B6285f6a2014-05-08 18:01:27 -070044TESTING_MODE_SLEEP_SECS = 1
Jakob Jueliche497b552014-09-23 19:11:59 -070045
mblighcb8cb332009-09-03 21:08:56 +000046try:
47 import atfork
48 atfork.monkeypatch_os_fork_functions()
49 import atfork.stdlib_fixer
50 # Fix the Python standard library for threading+fork safety with its
51 # internal locks. http://code.google.com/p/python-atfork/
52 import warnings
53 warnings.filterwarnings('ignore', 'logging module already imported')
54 atfork.stdlib_fixer.fix_logging_module()
55except ImportError, e:
56 from autotest_lib.client.common_lib import global_config
Dan Shia06f3e22015-09-03 16:15:15 -070057 if _CONFIG.get_config_value(
mblighcb8cb332009-09-03 21:08:56 +000058 'AUTOSERV', 'require_atfork_module', type=bool, default=False):
59 print >>sys.stderr, 'Please run utils/build_externals.py'
60 print e
61 sys.exit(1)
mbligh9ff89cd2009-09-03 20:28:17 +000062
Kevin Cheng9b6930f2016-07-20 14:57:15 -070063from autotest_lib.server import frontend
showard75cdfee2009-06-10 17:40:41 +000064from autotest_lib.server import server_logging_config
showard043c62a2009-06-10 19:48:57 +000065from autotest_lib.server import server_job, utils, autoserv_parser, autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070066from autotest_lib.server import utils as server_utils
Paul Hobbs20cc72a2016-08-30 16:57:05 -070067from autotest_lib.server import site_utils
Kevin Chengadc99f92016-07-20 08:21:58 -070068from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Dan Shicf4d2032015-03-12 15:04:21 -070069from autotest_lib.site_utils import job_directories
Fang Deng042c1472014-10-23 13:56:41 -070070from autotest_lib.site_utils import job_overhead
Dan Shicf4d2032015-03-12 15:04:21 -070071from autotest_lib.site_utils import lxc
Dan Shi7836d252015-04-27 15:33:58 -070072from autotest_lib.site_utils import lxc_utils
showard75cdfee2009-06-10 17:40:41 +000073from autotest_lib.client.common_lib import pidfile, logging_manager
mbligh92c0fc22008-11-20 16:52:23 +000074
Paul Hobbs20cc72a2016-08-30 16:57:05 -070075
Dan Shicf4d2032015-03-12 15:04:21 -070076# Control segment to stage server-side package.
77STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
78 'stage_server_side_package')
79
Dan Shia06f3e22015-09-03 16:15:15 -070080# Command line to start servod in a moblab.
81START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s'
82STOP_SERVOD_CMD = 'sudo stop servod'
83
Alex Millerf1af17e2013-01-09 22:50:32 -080084def log_alarm(signum, frame):
85 logging.error("Received SIGALARM. Ignoring and continuing on.")
Alex Miller0528d6f2013-01-11 10:49:48 -080086 sys.exit(1)
Alex Millerf1af17e2013-01-09 22:50:32 -080087
Dan Shicf4d2032015-03-12 15:04:21 -070088
89def _get_machines(parser):
90 """Get a list of machine names from command line arg -m or a file.
91
92 @param parser: Parser for the command line arguments.
93
94 @return: A list of machine names from command line arg -m or the
95 machines file specified in the command line arg -M.
96 """
97 if parser.options.machines:
98 machines = parser.options.machines.replace(',', ' ').strip().split()
99 else:
100 machines = []
101 machines_file = parser.options.machines_file
102 if machines_file:
103 machines = []
104 for m in open(machines_file, 'r').readlines():
105 # remove comments, spaces
106 m = re.sub('#.*', '', m).strip()
107 if m:
108 machines.append(m)
109 logging.debug('Read list of machines from file: %s', machines_file)
110 logging.debug('Machines: %s', ','.join(machines))
111
112 if machines:
113 for machine in machines:
114 if not machine or re.search('\s', machine):
115 parser.parser.error("Invalid machine: %s" % str(machine))
116 machines = list(set(machines))
117 machines.sort()
118 return machines
119
120
121def _stage_ssp(parser):
122 """Stage server-side package.
123
124 This function calls a control segment to stage server-side package based on
125 the job and autoserv command line option. The detail implementation could
126 be different for each host type. Currently, only CrosHost has
127 stage_server_side_package function defined.
128 The script returns None if no server-side package is available. However,
129 it may raise exception if it failed for reasons other than artifact (the
130 server-side package) not found.
131
132 @param parser: Command line arguments parser passed in the autoserv process.
133
Dan Shi14de7622016-08-22 11:09:06 -0700134 @return: (ssp_url, error_msg), where
135 ssp_url is a url to the autotest server-side package. None if
136 server-side package is not supported.
137 error_msg is a string indicating the failures. None if server-
138 side package is staged successfully.
Dan Shicf4d2032015-03-12 15:04:21 -0700139 """
Kevin Chengadc99f92016-07-20 08:21:58 -0700140 machines_list = _get_machines(parser)
141 if bool(parser.options.lab):
Prathmesh Prabhu5ae41392017-01-10 15:57:25 -0800142 machines_list = server_job.get_machine_dicts(
Prathmesh Prabhubc5cc542017-01-10 15:58:08 -0800143 machines_list, parser.options.lab,
144 parser.options.host_attributes)
Kevin Chengadc99f92016-07-20 08:21:58 -0700145
Dan Shi36cfd832014-10-10 13:38:51 -0700146 # If test_source_build is not specified, default to use server-side test
147 # code from build specified in --image.
Kevin Chengadc99f92016-07-20 08:21:58 -0700148 namespace = {'machines': machines_list,
Dan Shi36cfd832014-10-10 13:38:51 -0700149 'image': (parser.options.test_source_build or
150 parser.options.image),}
Dan Shicf4d2032015-03-12 15:04:21 -0700151 script_locals = {}
152 execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
Dan Shi14de7622016-08-22 11:09:06 -0700153 return script_locals['ssp_url'], script_locals['error_msg']
Dan Shicf4d2032015-03-12 15:04:21 -0700154
155
Dan Shiafa63872016-02-23 15:32:31 -0800156def _run_with_ssp(job, container_name, job_id, results, parser, ssp_url,
Dan Shi3be35af2016-08-25 23:22:40 -0700157 job_folder, machines):
Dan Shicf4d2032015-03-12 15:04:21 -0700158 """Run the server job with server-side packaging.
159
Dan Shi37befda2015-12-07 13:16:56 -0800160 @param job: The server job object.
Dan Shicf4d2032015-03-12 15:04:21 -0700161 @param container_name: Name of the container to run the test.
162 @param job_id: ID of the test job.
163 @param results: Folder to store results. This could be different from
164 parser.options.results:
165 parser.options.results can be set to None for results to be
166 stored in a temp folder.
167 results can be None for autoserv run requires no logging.
168 @param parser: Command line parser that contains the options.
169 @param ssp_url: url of the staged server-side package.
Dan Shiafa63872016-02-23 15:32:31 -0800170 @param job_folder: Name of the job result folder.
Dan Shi3be35af2016-08-25 23:22:40 -0700171 @param machines: A list of machines to run the test.
Dan Shicf4d2032015-03-12 15:04:21 -0700172 """
173 bucket = lxc.ContainerBucket()
174 control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
175 else None)
Dan Shi37befda2015-12-07 13:16:56 -0800176 try:
Dan Shi3be35af2016-08-25 23:22:40 -0700177 dut_name = machines[0] if len(machines) >= 1 else None
Dan Shi37befda2015-12-07 13:16:56 -0800178 test_container = bucket.setup_test(container_name, job_id, ssp_url,
Dan Shiafa63872016-02-23 15:32:31 -0800179 results, control=control,
Dan Shi3be35af2016-08-25 23:22:40 -0700180 job_folder=job_folder,
181 dut_name=dut_name)
Dan Shi37befda2015-12-07 13:16:56 -0800182 except Exception as e:
183 job.record('FAIL', None, None,
184 'Failed to setup container for test: %s. Check logs in '
185 'ssp_logs folder for more details.' % e)
186 raise
187
Dan Shicf4d2032015-03-12 15:04:21 -0700188 args = sys.argv[:]
189 args.remove('--require-ssp')
Dan Shi77b79a62015-07-29 16:22:05 -0700190 # --parent_job_id is only useful in autoserv running in host, not in
191 # container. Include this argument will cause test to fail for builds before
192 # CL 286265 was merged.
193 if '--parent_job_id' in args:
194 index = args.index('--parent_job_id')
195 args.remove('--parent_job_id')
196 # Remove the actual parent job id in command line arg.
197 del args[index]
Dan Shicf4d2032015-03-12 15:04:21 -0700198
199 # A dictionary of paths to replace in the command line. Key is the path to
200 # be replaced with the one in value.
201 paths_to_replace = {}
202 # Replace the control file path with the one in container.
203 if control:
204 container_control_filename = os.path.join(
205 lxc.CONTROL_TEMP_PATH, os.path.basename(control))
206 paths_to_replace[control] = container_control_filename
207 # Update result directory with the one in container.
Dan Shi65374e22016-09-15 16:14:05 -0700208 container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_folder)
Dan Shicf4d2032015-03-12 15:04:21 -0700209 if parser.options.results:
Dan Shicf4d2032015-03-12 15:04:21 -0700210 paths_to_replace[parser.options.results] = container_result_dir
211 # Update parse_job directory with the one in container. The assumption is
212 # that the result folder to be parsed is always the same as the results_dir.
213 if parser.options.parse_job:
Dan Shicf4d2032015-03-12 15:04:21 -0700214 paths_to_replace[parser.options.parse_job] = container_result_dir
215
216 args = [paths_to_replace.get(arg, arg) for arg in args]
217
218 # Apply --use-existing-results, results directory is aready created and
219 # mounted in container. Apply this arg to avoid exception being raised.
220 if not '--use-existing-results' in args:
221 args.append('--use-existing-results')
222
223 # Make sure autoserv running in container using a different pid file.
224 if not '--pidfile-label' in args:
225 args.extend(['--pidfile-label', 'container_autoserv'])
226
Dan Shid1f51232015-04-18 00:29:14 -0700227 cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
Dan Shicf4d2032015-03-12 15:04:21 -0700228 logging.info('Run command in container: %s', cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700229 success = False
Dan Shicf4d2032015-03-12 15:04:21 -0700230 try:
231 test_container.attach_run(cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700232 success = True
Dan Shi9d3454e2015-12-08 09:16:08 -0800233 except Exception as e:
234 # If the test run inside container fails without generating any log,
235 # write a message to status.log to help troubleshooting.
236 debug_files = os.listdir(os.path.join(results, 'debug'))
237 if not debug_files:
238 job.record('FAIL', None, None,
239 'Failed to run test inside the container: %s. Check '
240 'logs in ssp_logs folder for more details.' % e)
241 raise
Dan Shicf4d2032015-03-12 15:04:21 -0700242 finally:
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -0800243 metrics.Counter(
244 'chromeos/autotest/experimental/execute_job_in_ssp').increment(
245 fields={'success': success})
Dan Shi37bee222015-04-13 15:46:47 -0700246 # metadata is uploaded separately so it can use http to upload.
247 metadata = {'drone': socket.gethostname(),
248 'job_id': job_id,
249 'success': success}
250 autotest_es.post(use_http=True,
251 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
252 metadata=metadata)
Dan Shicf4d2032015-03-12 15:04:21 -0700253 test_container.destroy()
254
255
Dan Shi3f1b8a52015-04-21 11:11:06 -0700256def correct_results_folder_permission(results):
257 """Make sure the results folder has the right permission settings.
258
259 For tests running with server-side packaging, the results folder has the
260 owner of root. This must be changed to the user running the autoserv
261 process, so parsing job can access the results folder.
262 TODO(dshi): crbug.com/459344 Remove this function when test container can be
263 unprivileged container.
264
265 @param results: Path to the results folder.
266
267 """
268 if not results:
269 return
270
Dan Shi32649b82015-08-29 20:53:36 -0700271 try:
272 utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
273 utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
274 except error.CmdError as e:
275 metadata = {'error': str(e),
276 'result_folder': results,
277 'drone': socket.gethostname()}
278 autotest_es.post(use_http=True, type_str='correct_results_folder_failure',
279 metadata=metadata)
280 raise
Dan Shi3f1b8a52015-04-21 11:11:06 -0700281
282
Dan Shia06f3e22015-09-03 16:15:15 -0700283def _start_servod(machine):
284 """Try to start servod in moblab if it's not already running or running with
285 different board or port.
286
287 @param machine: Name of the dut used for test.
288 """
289 if not utils.is_moblab():
290 return
291
Dan Shi1cded882015-09-23 16:52:26 -0700292 logging.debug('Trying to start servod.')
Dan Shia06f3e22015-09-03 16:15:15 -0700293 try:
Kevin Cheng9b6930f2016-07-20 14:57:15 -0700294 afe = frontend.AFE()
Dan Shia06f3e22015-09-03 16:15:15 -0700295 board = server_utils.get_board_from_afe(machine, afe)
296 hosts = afe.get_hosts(hostname=machine)
297 servo_host = hosts[0].attributes.get('servo_host', None)
298 servo_port = hosts[0].attributes.get('servo_port', 9999)
299 if not servo_host in ['localhost', '127.0.0.1']:
Dan Shi1cded882015-09-23 16:52:26 -0700300 logging.warn('Starting servod is aborted. The dut\'s servo_host '
301 'attribute is not set to localhost.')
Dan Shia06f3e22015-09-03 16:15:15 -0700302 return
303 except (urllib2.HTTPError, urllib2.URLError):
304 # Ignore error if RPC failed to get board
305 logging.error('Failed to get board name from AFE. Start servod is '
306 'aborted')
307 return
308
309 try:
310 pid = utils.run('pgrep servod').stdout
311 cmd_line = utils.run('ps -fp %s' % pid).stdout
312 if ('--board %s' % board in cmd_line and
313 '--port %s' % servo_port in cmd_line):
314 logging.debug('Servod is already running with given board and port.'
315 ' There is no need to restart servod.')
316 return
317 logging.debug('Servod is running with different board or port. '
318 'Stopping existing servod.')
319 utils.run('sudo stop servod')
320 except error.CmdError:
321 # servod is not running.
322 pass
323
324 try:
325 utils.run(START_SERVOD_CMD % (board, servo_port))
326 logging.debug('Servod is started')
327 except error.CmdError as e:
328 logging.error('Servod failed to be started, error: %s', e)
329
330
Dan Shic68fefb2015-04-07 10:10:52 -0700331def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700332 """Run server job with given options.
333
334 @param pid_file_manager: PidFileManager used to monitor the autoserv process
335 @param results: Folder to store results.
336 @param parser: Parser for the command line arguments.
337 @param ssp_url: Url to server-side package.
Dan Shic68fefb2015-04-07 10:10:52 -0700338 @param use_ssp: Set to True to run with server-side packaging.
Dan Shicf4d2032015-03-12 15:04:21 -0700339 """
Dan Shiec1d47d2015-02-13 11:38:13 -0800340 if parser.options.warn_no_ssp:
Dan Shic68fefb2015-04-07 10:10:52 -0700341 # Post a warning in the log.
Dan Shiec1d47d2015-02-13 11:38:13 -0800342 logging.warn('Autoserv is required to run with server-side packaging. '
343 'However, no drone is found to support server-side '
344 'packaging. The test will be executed in a drone without '
345 'server-side packaging supported.')
346
jadmanski0afbb632008-06-06 21:10:57 +0000347 # send stdin to /dev/null
348 dev_null = os.open(os.devnull, os.O_RDONLY)
349 os.dup2(dev_null, sys.stdin.fileno())
350 os.close(dev_null)
mblighdbf37612007-11-24 19:38:11 +0000351
Dan Shie8aeb662016-06-30 11:22:03 -0700352 # Create separate process group if the process is not a process group
353 # leader. This allows autoserv process to keep running after the caller
354 # process (drone manager call) exits.
355 if os.getpid() != os.getpgid(0):
356 os.setsid()
mbligh1d42d4e2007-11-05 22:42:00 +0000357
Dan Shicf4d2032015-03-12 15:04:21 -0700358 # Container name is predefined so the container can be destroyed in
359 # handle_sigterm.
360 job_or_task_id = job_directories.get_job_id_or_task_id(
361 parser.options.results)
362 container_name = (lxc.TEST_CONTAINER_NAME_FMT %
Dan Shid68d51c2015-04-21 17:00:42 -0700363 (job_or_task_id, time.time(), os.getpid()))
Dan Shiafa63872016-02-23 15:32:31 -0800364 job_folder = job_directories.get_job_folder_name(parser.options.results)
Dan Shicf4d2032015-03-12 15:04:21 -0700365
jadmanski0afbb632008-06-06 21:10:57 +0000366 # Implement SIGTERM handler
mblighc2299562009-07-02 19:00:36 +0000367 def handle_sigterm(signum, frame):
Simran Basi9d9b7292013-10-16 16:44:07 -0700368 logging.debug('Received SIGTERM')
mblighff7d61f2008-12-22 14:53:35 +0000369 if pid_file_manager:
370 pid_file_manager.close_file(1, signal.SIGTERM)
Simran Basi49e21e62013-10-17 12:40:33 -0700371 logging.debug('Finished writing to pid_file. Killing process.')
Dan Shi3f1b8a52015-04-21 11:11:06 -0700372
373 # Update results folder's file permission. This needs to be done ASAP
374 # before the parsing process tries to access the log.
375 if use_ssp and results:
376 correct_results_folder_permission(results)
377
Simran Basid6b83772014-01-06 16:31:30 -0800378 # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
379 # This sleep allows the pending output to be logged before the kill
380 # signal is sent.
381 time.sleep(.1)
Dan Shic68fefb2015-04-07 10:10:52 -0700382 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700383 logging.debug('Destroy container %s before aborting the autoserv '
384 'process.', container_name)
Dan Shi3f1b8a52015-04-21 11:11:06 -0700385 metadata = {'drone': socket.gethostname(),
386 'job_id': job_or_task_id,
387 'container_name': container_name,
388 'action': 'abort',
389 'success': True}
Dan Shicf4d2032015-03-12 15:04:21 -0700390 try:
391 bucket = lxc.ContainerBucket()
392 container = bucket.get(container_name)
393 if container:
394 container.destroy()
395 else:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700396 metadata['success'] = False
397 metadata['error'] = 'container not found'
Dan Shicf4d2032015-03-12 15:04:21 -0700398 logging.debug('Container %s is not found.', container_name)
399 except:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700400 metadata['success'] = False
Dan Shi65374e22016-09-15 16:14:05 -0700401 metadata['error'] = 'Exception: %s' % str(sys.exc_info())
Dan Shicf4d2032015-03-12 15:04:21 -0700402 # Handle any exception so the autoserv process can be aborted.
Dan Shi65374e22016-09-15 16:14:05 -0700403 logging.exception('Failed to destroy container %s.',
404 container_name)
Dan Shi3f1b8a52015-04-21 11:11:06 -0700405 autotest_es.post(use_http=True,
406 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
407 metadata=metadata)
Dan Shie4a4f9f2015-07-20 09:00:25 -0700408 # Try to correct the result file permission again after the
409 # container is destroyed, as the container might have created some
410 # new files in the result folder.
411 if results:
412 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700413
jadmanski0afbb632008-06-06 21:10:57 +0000414 os.killpg(os.getpgrp(), signal.SIGKILL)
mblighfaf0cd42007-11-19 16:00:24 +0000415
jadmanski0afbb632008-06-06 21:10:57 +0000416 # Set signal handler
mblighc2299562009-07-02 19:00:36 +0000417 signal.signal(signal.SIGTERM, handle_sigterm)
mbligha46678d2008-05-01 20:00:01 +0000418
Simran Basid6b83772014-01-06 16:31:30 -0800419 # faulthandler is only needed to debug in the Lab and is not avaliable to
420 # be imported in the chroot as part of VMTest, so Try-Except it.
421 try:
422 import faulthandler
423 faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
424 logging.debug('faulthandler registered on SIGTERM.')
425 except ImportError:
Christopher Grant4beca022015-06-16 15:14:47 -0400426 sys.exc_clear()
Simran Basid6b83772014-01-06 16:31:30 -0800427
David Rochberg8a60d1e2011-02-01 14:22:07 -0500428 # Ignore SIGTTOU's generated by output from forked children.
429 signal.signal(signal.SIGTTOU, signal.SIG_IGN)
430
Alex Millerf1af17e2013-01-09 22:50:32 -0800431 # If we received a SIGALARM, let's be loud about it.
432 signal.signal(signal.SIGALRM, log_alarm)
433
mbligha5f5e542009-12-30 16:57:49 +0000434 # Server side tests that call shell scripts often depend on $USER being set
435 # but depending on how you launch your autotest scheduler it may not be set.
436 os.environ['USER'] = getpass.getuser()
437
mblighb2bea302008-07-24 20:25:57 +0000438 label = parser.options.label
mbligh374f3412009-05-13 21:29:45 +0000439 group_name = parser.options.group_name
mblighb2bea302008-07-24 20:25:57 +0000440 user = parser.options.user
441 client = parser.options.client
442 server = parser.options.server
jadmanski0afbb632008-06-06 21:10:57 +0000443 install_before = parser.options.install_before
mblighb2bea302008-07-24 20:25:57 +0000444 install_after = parser.options.install_after
445 verify = parser.options.verify
446 repair = parser.options.repair
showard45ae8192008-11-05 19:32:53 +0000447 cleanup = parser.options.cleanup
Alex Millercb79ba72013-05-29 14:43:00 -0700448 provision = parser.options.provision
Dan Shi07e09af2013-04-12 09:31:29 -0700449 reset = parser.options.reset
Alex Miller667b5f22014-02-28 15:33:39 -0800450 job_labels = parser.options.job_labels
mblighb2bea302008-07-24 20:25:57 +0000451 no_tee = parser.options.no_tee
jadmanski0afbb632008-06-06 21:10:57 +0000452 parse_job = parser.options.parse_job
mblighe7d9c602009-07-02 19:02:33 +0000453 execution_tag = parser.options.execution_tag
454 if not execution_tag:
455 execution_tag = parse_job
jadmanski0afbb632008-06-06 21:10:57 +0000456 ssh_user = parser.options.ssh_user
457 ssh_port = parser.options.ssh_port
458 ssh_pass = parser.options.ssh_pass
jadmanskidef0c3c2009-03-25 20:07:10 +0000459 collect_crashinfo = parser.options.collect_crashinfo
mblighe0cbc912010-03-11 18:03:07 +0000460 control_filename = parser.options.control_filename
Scott Zawalski91493c82013-01-25 16:15:20 -0500461 test_retry = parser.options.test_retry
beepscb6f1e22013-06-28 19:14:10 -0700462 verify_job_repo_url = parser.options.verify_job_repo_url
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700463 skip_crash_collection = parser.options.skip_crash_collection
Aviv Keshet18ee3142013-08-12 15:01:51 -0700464 ssh_verbosity = int(parser.options.ssh_verbosity)
Fang Deng6cc20de2013-09-06 15:47:32 -0700465 ssh_options = parser.options.ssh_options
Dan Shib669cbd2013-09-13 11:17:17 -0700466 no_use_packaging = parser.options.no_use_packaging
Simran Basi1bf60eb2015-12-01 16:39:29 -0800467 host_attributes = parser.options.host_attributes
468 in_lab = bool(parser.options.lab)
mbligha46678d2008-05-01 20:00:01 +0000469
mblighb2bea302008-07-24 20:25:57 +0000470 # can't be both a client and a server side test
471 if client and server:
Eric Li861b2d52011-02-04 14:50:35 -0800472 parser.parser.error("Can not specify a test as both server and client!")
mblighb2bea302008-07-24 20:25:57 +0000473
Alex Millercb79ba72013-05-29 14:43:00 -0700474 if provision and client:
475 parser.parser.error("Cannot specify provisioning and client!")
476
477 is_special_task = (verify or repair or cleanup or collect_crashinfo or
Dan Shi07e09af2013-04-12 09:31:29 -0700478 provision or reset)
Alex Millercb79ba72013-05-29 14:43:00 -0700479 if len(parser.args) < 1 and not is_special_task:
Eric Li861b2d52011-02-04 14:50:35 -0800480 parser.parser.error("Missing argument: control file")
mbligha46678d2008-05-01 20:00:01 +0000481
Aviv Keshet18ee3142013-08-12 15:01:51 -0700482 if ssh_verbosity > 0:
483 # ssh_verbosity is an integer between 0 and 3, inclusive
484 ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
Fang Dengd1c2b732013-08-20 12:59:46 -0700485 else:
486 ssh_verbosity_flag = ''
Aviv Keshet18ee3142013-08-12 15:01:51 -0700487
showard45ae8192008-11-05 19:32:53 +0000488 # We have a control file unless it's just a verify/repair/cleanup job
jadmanski0afbb632008-06-06 21:10:57 +0000489 if len(parser.args) > 0:
490 control = parser.args[0]
491 else:
492 control = None
mbligha46678d2008-05-01 20:00:01 +0000493
Dan Shicf4d2032015-03-12 15:04:21 -0700494 machines = _get_machines(parser)
mbligh374f3412009-05-13 21:29:45 +0000495 if group_name and len(machines) < 2:
Dan Shicf4d2032015-03-12 15:04:21 -0700496 parser.parser.error('-G %r may only be supplied with more than one '
497 'machine.' % group_name)
mbligh374f3412009-05-13 21:29:45 +0000498
Christopher Wiley8a91f232013-07-09 11:02:27 -0700499 kwargs = {'group_name': group_name, 'tag': execution_tag,
Dan Shicf4d2032015-03-12 15:04:21 -0700500 'disable_sysinfo': parser.options.disable_sysinfo}
Dan Shi70647ca2015-07-16 22:52:35 -0700501 if parser.options.parent_job_id:
502 kwargs['parent_job_id'] = int(parser.options.parent_job_id)
mblighe0cbc912010-03-11 18:03:07 +0000503 if control_filename:
504 kwargs['control_filename'] = control_filename
Simran Basi1bf60eb2015-12-01 16:39:29 -0800505 if host_attributes:
506 kwargs['host_attributes'] = host_attributes
507 kwargs['in_lab'] = in_lab
jadmanski0afbb632008-06-06 21:10:57 +0000508 job = server_job.server_job(control, parser.args[1:], results, label,
509 user, machines, client, parse_job,
Fang Dengd1c2b732013-08-20 12:59:46 -0700510 ssh_user, ssh_port, ssh_pass,
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700511 ssh_verbosity_flag, ssh_options,
512 test_retry, **kwargs)
Dan Shicf4d2032015-03-12 15:04:21 -0700513
showard75cdfee2009-06-10 17:40:41 +0000514 job.logging.start_logging()
mbligh4608b002010-01-05 18:22:35 +0000515 job.init_parser()
mbligha46678d2008-05-01 20:00:01 +0000516
mbligh161fe6f2008-06-19 16:26:04 +0000517 # perform checks
518 job.precheck()
519
jadmanski0afbb632008-06-06 21:10:57 +0000520 # run the job
521 exit_code = 0
Dan Shic1b8bdd2015-09-14 23:11:24 -0700522 auto_start_servod = _CONFIG.get_config_value(
523 'AUTOSERV', 'auto_start_servod', type=bool, default=False)
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700524
jadmanski0afbb632008-06-06 21:10:57 +0000525 try:
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700526 with site_utils.SetupTsMonGlobalState('autoserv', indirect=True, short_lived=True):
527 try:
528 if repair:
529 if auto_start_servod and len(machines) == 1:
530 _start_servod(machines[0])
531 job.repair(job_labels)
532 elif verify:
533 job.verify(job_labels)
534 elif provision:
535 job.provision(job_labels)
536 elif reset:
537 job.reset(job_labels)
538 elif cleanup:
539 job.cleanup(job_labels)
Dan Shicf4d2032015-03-12 15:04:21 -0700540 else:
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700541 if auto_start_servod and len(machines) == 1:
542 _start_servod(machines[0])
543 if use_ssp:
Dan Shiafa63872016-02-23 15:32:31 -0800544 try:
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700545 _run_with_ssp(job, container_name, job_or_task_id,
546 results, parser, ssp_url, job_folder,
547 machines)
548 finally:
549 # Update the ownership of files in result folder.
Dan Shiafa63872016-02-23 15:32:31 -0800550 correct_results_folder_permission(results)
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700551 else:
552 if collect_crashinfo:
553 # Update the ownership of files in result folder. If the
554 # job to collect crashinfo was running inside container
555 # (SSP) and crashed before correcting folder permission,
556 # the result folder might have wrong permission setting.
557 try:
558 correct_results_folder_permission(results)
559 except:
560 # Ignore any error as the user may not have root
561 # permission to run sudo command.
562 pass
563 job.run(install_before, install_after,
564 verify_job_repo_url=verify_job_repo_url,
565 only_collect_crashinfo=collect_crashinfo,
566 skip_crash_collection=skip_crash_collection,
567 job_labels=job_labels,
568 use_packaging=(not no_use_packaging))
569 finally:
570 while job.hosts:
571 host = job.hosts.pop()
572 host.close()
jadmanski0afbb632008-06-06 21:10:57 +0000573 except:
jadmanski27b37ea2008-10-29 23:54:31 +0000574 exit_code = 1
jadmanski0afbb632008-06-06 21:10:57 +0000575 traceback.print_exc()
mbligha46678d2008-05-01 20:00:01 +0000576
mblighff7d61f2008-12-22 14:53:35 +0000577 if pid_file_manager:
578 pid_file_manager.num_tests_failed = job.num_tests_failed
579 pid_file_manager.close_file(exit_code)
jadmanskie0dffc32008-12-15 17:30:30 +0000580 job.cleanup_parser()
showard21baa452008-10-21 00:08:39 +0000581
jadmanski27b37ea2008-10-29 23:54:31 +0000582 sys.exit(exit_code)
mbligha46678d2008-05-01 20:00:01 +0000583
584
Fang Deng042c1472014-10-23 13:56:41 -0700585def record_autoserv(options, duration_secs):
586 """Record autoserv end-to-end time in metadata db.
587
588 @param options: parser options.
589 @param duration_secs: How long autoserv has taken, in secs.
590 """
591 # Get machine hostname
592 machines = options.machines.replace(
593 ',', ' ').strip().split() if options.machines else []
594 num_machines = len(machines)
595 if num_machines > 1:
596 # Skip the case where atomic group is used.
597 return
598 elif num_machines == 0:
599 machines.append('hostless')
600
601 # Determine the status that will be reported.
602 s = job_overhead.STATUS
603 task_mapping = {
604 'reset': s.RESETTING, 'verify': s.VERIFYING,
605 'provision': s.PROVISIONING, 'repair': s.REPAIRING,
606 'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
Dan Shi888cfca2015-07-31 15:49:00 -0700607 match = filter(lambda task: getattr(options, task, False) == True,
608 task_mapping)
Fang Deng042c1472014-10-23 13:56:41 -0700609 status = task_mapping[match[0]] if match else s.RUNNING
610 is_special_task = status not in [s.RUNNING, s.GATHERING]
Dan Shicf4d2032015-03-12 15:04:21 -0700611 job_or_task_id = job_directories.get_job_id_or_task_id(options.results)
Fang Deng042c1472014-10-23 13:56:41 -0700612 job_overhead.record_state_duration(
613 job_or_task_id, machines[0], status, duration_secs,
614 is_special_task=is_special_task)
615
616
mbligha46678d2008-05-01 20:00:01 +0000617def main():
Fang Deng042c1472014-10-23 13:56:41 -0700618 start_time = datetime.datetime.now()
jadmanski0afbb632008-06-06 21:10:57 +0000619 # grab the parser
620 parser = autoserv_parser.autoserv_parser
mbligha5cb4062009-02-17 15:53:39 +0000621 parser.parse_args()
mbligha46678d2008-05-01 20:00:01 +0000622
jadmanski0afbb632008-06-06 21:10:57 +0000623 if len(sys.argv) == 1:
624 parser.parser.print_help()
625 sys.exit(1)
mbligha6f13082008-06-05 23:53:46 +0000626
Dan Shicf4d2032015-03-12 15:04:21 -0700627 # If the job requires to run with server-side package, try to stage server-
628 # side package first. If that fails with error that autotest server package
Dan Shic68fefb2015-04-07 10:10:52 -0700629 # does not exist, fall back to run the job without using server-side
630 # packaging. If option warn_no_ssp is specified, that means autoserv is
631 # running in a drone does not support SSP, thus no need to stage server-side
632 # package.
Dan Shicf4d2032015-03-12 15:04:21 -0700633 ssp_url = None
Dan Shi0b754c52015-04-20 14:20:38 -0700634 ssp_url_warning = False
Dan Shic68fefb2015-04-07 10:10:52 -0700635 if (not parser.options.warn_no_ssp and parser.options.require_ssp):
Dan Shi14de7622016-08-22 11:09:06 -0700636 ssp_url, ssp_error_msg = _stage_ssp(parser)
Dan Shi0b754c52015-04-20 14:20:38 -0700637 # The build does not have autotest server package. Fall back to not
638 # to use server-side package. Logging is postponed until logging being
639 # set up.
640 ssp_url_warning = not ssp_url
Dan Shicf4d2032015-03-12 15:04:21 -0700641
showard75cdfee2009-06-10 17:40:41 +0000642 if parser.options.no_logging:
643 results = None
644 else:
645 results = parser.options.results
mbligh80e1eba2008-11-19 00:26:18 +0000646 if not results:
647 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
Dan Shi14de7622016-08-22 11:09:06 -0700648 results = os.path.abspath(results)
showard566d3c02010-01-12 18:57:01 +0000649 resultdir_exists = False
650 for filename in ('control.srv', 'status.log', '.autoserv_execute'):
651 if os.path.exists(os.path.join(results, filename)):
652 resultdir_exists = True
mbligh4608b002010-01-05 18:22:35 +0000653 if not parser.options.use_existing_results and resultdir_exists:
mbligh80e1eba2008-11-19 00:26:18 +0000654 error = "Error: results directory already exists: %s\n" % results
655 sys.stderr.write(error)
656 sys.exit(1)
mbligha788dc42009-03-26 21:10:16 +0000657
658 # Now that we certified that there's no leftover results dir from
659 # previous jobs, lets create the result dir since the logging system
660 # needs to create the log file in there.
661 if not os.path.isdir(results):
662 os.makedirs(results)
showard75cdfee2009-06-10 17:40:41 +0000663
Dan Shic68fefb2015-04-07 10:10:52 -0700664 # Server-side packaging will only be used if it's required and the package
665 # is available. If warn_no_ssp is specified, it means that autoserv is
666 # running in a drone does not have SSP supported and a warning will be logs.
667 # Therefore, it should not run with SSP.
668 use_ssp = (not parser.options.warn_no_ssp and parser.options.require_ssp
669 and ssp_url)
670 if use_ssp:
Dan Shie28de552015-05-06 16:51:58 -0700671 log_dir = os.path.join(results, 'ssp_logs') if results else None
Dan Shicf4d2032015-03-12 15:04:21 -0700672 if log_dir and not os.path.exists(log_dir):
673 os.makedirs(log_dir)
674 else:
675 log_dir = results
Dan Shi3f1b8a52015-04-21 11:11:06 -0700676
showard75cdfee2009-06-10 17:40:41 +0000677 logging_manager.configure_logging(
Dan Shicf4d2032015-03-12 15:04:21 -0700678 server_logging_config.ServerLoggingConfig(),
679 results_dir=log_dir,
showard10d84172009-06-18 23:16:50 +0000680 use_console=not parser.options.no_tee,
681 verbose=parser.options.verbose,
682 no_console_prefix=parser.options.no_console_prefix)
Dan Shicf4d2032015-03-12 15:04:21 -0700683
Dan Shi0b754c52015-04-20 14:20:38 -0700684 if ssp_url_warning:
685 logging.warn(
686 'Autoserv is required to run with server-side packaging. '
687 'However, no server-side package can be found based on '
Dan Shi6450e142016-03-11 11:52:20 -0800688 '`--image`, host attribute job_repo_url or host OS version '
689 'label. It could be that the build to test is older than the '
690 'minimum version that supports server-side packaging. The test '
Dan Shi14de7622016-08-22 11:09:06 -0700691 'will be executed without using erver-side packaging. '
692 'Following is the detailed error:\n%s', ssp_error_msg)
Dan Shi0b754c52015-04-20 14:20:38 -0700693
showard75cdfee2009-06-10 17:40:41 +0000694 if results:
mbligha788dc42009-03-26 21:10:16 +0000695 logging.info("Results placed in %s" % results)
mbligh10717632008-11-19 00:21:57 +0000696
mbligh4608b002010-01-05 18:22:35 +0000697 # wait until now to perform this check, so it get properly logged
Dan Shicf4d2032015-03-12 15:04:21 -0700698 if (parser.options.use_existing_results and not resultdir_exists and
Dan Shiff78f112015-06-12 13:34:02 -0700699 not utils.is_in_container()):
mbligh4608b002010-01-05 18:22:35 +0000700 logging.error("No existing results directory found: %s", results)
701 sys.exit(1)
702
Dan Shicf4d2032015-03-12 15:04:21 -0700703 logging.debug('autoserv is running in drone %s.', socket.gethostname())
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700704 logging.debug('autoserv command was: %s', ' '.join(sys.argv))
mbligh4608b002010-01-05 18:22:35 +0000705
Dan Shicf4d2032015-03-12 15:04:21 -0700706 if parser.options.write_pidfile and results:
mbligh4608b002010-01-05 18:22:35 +0000707 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
708 results)
jadmanskid5ab8c52008-12-03 16:27:07 +0000709 pid_file_manager.open_file()
mblighff7d61f2008-12-22 14:53:35 +0000710 else:
711 pid_file_manager = None
mbligha46678d2008-05-01 20:00:01 +0000712
jadmanskif22fea82008-11-26 20:57:07 +0000713 autotest.BaseAutotest.set_install_in_tmpdir(
714 parser.options.install_in_tmpdir)
715
Dan Shia1ecd5c2013-06-06 11:21:31 -0700716 try:
717 # Take the first argument as control file name, get the test name from
Prathmesh Prabhueb7cfc12016-11-23 16:37:30 -0800718 # the control file.
Dan Shia1ecd5c2013-06-06 11:21:31 -0700719 if (len(parser.args) > 0 and parser.args[0] != '' and
720 parser.options.machines):
Dan Shibbc16132013-07-09 16:23:59 -0700721 try:
722 test_name = control_data.parse_control(parser.args[0],
723 raise_warnings=True).name
724 except control_data.ControlVariableException:
725 logging.debug('Failed to retrieve test name from control file.')
726 test_name = None
Dan Shia1ecd5c2013-06-06 11:21:31 -0700727 except control_data.ControlVariableException as e:
728 logging.error(str(e))
jadmanski0afbb632008-06-06 21:10:57 +0000729 exit_code = 0
Prashanth B6285f6a2014-05-08 18:01:27 -0700730 # TODO(beeps): Extend this to cover different failure modes.
731 # Testing exceptions are matched against labels sent to autoserv. Eg,
732 # to allow only the hostless job to run, specify
733 # testing_exceptions: test_suite in the shadow_config. To allow both
734 # the hostless job and dummy_Pass to run, specify
735 # testing_exceptions: test_suite,dummy_Pass. You can figure out
736 # what label autoserv is invoked with by looking through the logs of a test
737 # for the autoserv command's -l option.
Dan Shia06f3e22015-09-03 16:15:15 -0700738 testing_exceptions = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700739 'AUTOSERV', 'testing_exceptions', type=list, default=[])
Dan Shia06f3e22015-09-03 16:15:15 -0700740 test_mode = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700741 'AUTOSERV', 'testing_mode', type=bool, default=False)
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800742 test_mode = (results_mocker and test_mode and not
743 any([ex in parser.options.label
744 for ex in testing_exceptions]))
745 is_task = (parser.options.verify or parser.options.repair or
746 parser.options.provision or parser.options.reset or
747 parser.options.cleanup or parser.options.collect_crashinfo)
jadmanski0afbb632008-06-06 21:10:57 +0000748 try:
749 try:
Prashanth B6285f6a2014-05-08 18:01:27 -0700750 if test_mode:
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800751 # The parser doesn't run on tasks anyway, so we can just return
752 # happy signals without faking results.
753 if not is_task:
754 machine = parser.options.results.split('/')[-1]
755
756 # TODO(beeps): The proper way to do this would be to
757 # refactor job creation so we can invoke job.record
758 # directly. To do that one needs to pipe the test_name
759 # through run_autoserv and bail just before invoking
760 # the server job. See the comment in
761 # puppylab/results_mocker for more context.
762 results_mocker.ResultsMocker(
Prashanth Balasubramanian22dd2262014-11-28 18:19:18 -0800763 test_name if test_name else 'unknown-test',
764 parser.options.results, machine
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800765 ).mock_results()
766 return
Prashanth B6285f6a2014-05-08 18:01:27 -0700767 else:
Dan Shic68fefb2015-04-07 10:10:52 -0700768 run_autoserv(pid_file_manager, results, parser, ssp_url,
769 use_ssp)
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700770 except SystemExit as e:
jadmanski0afbb632008-06-06 21:10:57 +0000771 exit_code = e.code
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700772 if exit_code:
773 logging.exception(e)
774 except Exception as e:
jadmanski0afbb632008-06-06 21:10:57 +0000775 # If we don't know what happened, we'll classify it as
776 # an 'abort' and return 1.
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700777 logging.exception(e)
jadmanski0afbb632008-06-06 21:10:57 +0000778 exit_code = 1
779 finally:
mblighff7d61f2008-12-22 14:53:35 +0000780 if pid_file_manager:
781 pid_file_manager.close_file(exit_code)
Fang Deng042c1472014-10-23 13:56:41 -0700782 # Record the autoserv duration time. Must be called
783 # just before the system exits to ensure accuracy.
784 duration_secs = (datetime.datetime.now() - start_time).total_seconds()
785 record_autoserv(parser.options, duration_secs)
jadmanski0afbb632008-06-06 21:10:57 +0000786 sys.exit(exit_code)
mblighfaf0cd42007-11-19 16:00:24 +0000787
mblighbb421852008-03-11 22:36:16 +0000788
mbligha46678d2008-05-01 20:00:01 +0000789if __name__ == '__main__':
jadmanski0afbb632008-06-06 21:10:57 +0000790 main()