blob: 9b2f5fb64de9cf6db6d7ffe1149e4cb4dd0d510c [file] [log] [blame]
mbligh6203ace2007-10-04 21:54:24 +00001#!/usr/bin/python -u
mbligh1ffd5dc2008-11-25 13:24:05 +00002# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
mbligh82648e52008-11-20 16:54:25 +00003# Released under the GPL v2
mblighdcd57a82007-07-11 23:06:47 +00004
mblighc8949b82007-07-23 16:33:58 +00005"""
Aviv Keshetde6bb192013-01-30 16:17:22 -08006Run a control file through the server side engine
mblighdcd57a82007-07-11 23:06:47 +00007"""
mbligh1ffd5dc2008-11-25 13:24:05 +00008
Fang Deng042c1472014-10-23 13:56:41 -07009import datetime
Paul Hobbs20cc72a2016-08-30 16:57:05 -070010import contextlib
Fang Deng042c1472014-10-23 13:56:41 -070011import getpass
12import logging
13import os
14import re
15import signal
Dan Shicf4d2032015-03-12 15:04:21 -070016import socket
Fang Deng042c1472014-10-23 13:56:41 -070017import sys
18import traceback
19import time
20import urllib2
mbligh1ffd5dc2008-11-25 13:24:05 +000021
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080022
mblighf5427bb2008-04-09 15:55:57 +000023import common
Dan Shia1ecd5c2013-06-06 11:21:31 -070024from autotest_lib.client.common_lib import control_data
Dan Shi32649b82015-08-29 20:53:36 -070025from autotest_lib.client.common_lib import error
Dan Shia1ecd5c2013-06-06 11:21:31 -070026from autotest_lib.client.common_lib import global_config
Dan Shi5ddf6a32015-05-02 00:22:01 -070027from autotest_lib.client.common_lib import utils
Dan Shi37bee222015-04-13 15:46:47 -070028from autotest_lib.client.common_lib.cros.graphite import autotest_es
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080029
30from chromite.lib import metrics
31
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -080032try:
33 from autotest_lib.puppylab import results_mocker
34except ImportError:
35 results_mocker = None
36
Dan Shia06f3e22015-09-03 16:15:15 -070037_CONFIG = global_config.global_config
38
39require_atfork = _CONFIG.get_config_value(
mblighcb8cb332009-09-03 21:08:56 +000040 'AUTOSERV', 'require_atfork_module', type=bool, default=True)
41
Dan Shia1ecd5c2013-06-06 11:21:31 -070042
Jakob Jueliche497b552014-09-23 19:11:59 -070043# Number of seconds to wait before returning if testing mode is enabled
Prashanth B6285f6a2014-05-08 18:01:27 -070044TESTING_MODE_SLEEP_SECS = 1
Jakob Jueliche497b552014-09-23 19:11:59 -070045
mblighcb8cb332009-09-03 21:08:56 +000046try:
47 import atfork
48 atfork.monkeypatch_os_fork_functions()
49 import atfork.stdlib_fixer
50 # Fix the Python standard library for threading+fork safety with its
51 # internal locks. http://code.google.com/p/python-atfork/
52 import warnings
53 warnings.filterwarnings('ignore', 'logging module already imported')
54 atfork.stdlib_fixer.fix_logging_module()
55except ImportError, e:
56 from autotest_lib.client.common_lib import global_config
Dan Shia06f3e22015-09-03 16:15:15 -070057 if _CONFIG.get_config_value(
mblighcb8cb332009-09-03 21:08:56 +000058 'AUTOSERV', 'require_atfork_module', type=bool, default=False):
59 print >>sys.stderr, 'Please run utils/build_externals.py'
60 print e
61 sys.exit(1)
mbligh9ff89cd2009-09-03 20:28:17 +000062
Kevin Cheng9b6930f2016-07-20 14:57:15 -070063from autotest_lib.server import frontend
showard75cdfee2009-06-10 17:40:41 +000064from autotest_lib.server import server_logging_config
showard043c62a2009-06-10 19:48:57 +000065from autotest_lib.server import server_job, utils, autoserv_parser, autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070066from autotest_lib.server import utils as server_utils
Paul Hobbs20cc72a2016-08-30 16:57:05 -070067from autotest_lib.server import site_utils
Kevin Chengadc99f92016-07-20 08:21:58 -070068from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Dan Shicf4d2032015-03-12 15:04:21 -070069from autotest_lib.site_utils import job_directories
Fang Deng042c1472014-10-23 13:56:41 -070070from autotest_lib.site_utils import job_overhead
Dan Shicf4d2032015-03-12 15:04:21 -070071from autotest_lib.site_utils import lxc
Dan Shi7836d252015-04-27 15:33:58 -070072from autotest_lib.site_utils import lxc_utils
showard75cdfee2009-06-10 17:40:41 +000073from autotest_lib.client.common_lib import pidfile, logging_manager
mbligh92c0fc22008-11-20 16:52:23 +000074
Paul Hobbs20cc72a2016-08-30 16:57:05 -070075
Dan Shicf4d2032015-03-12 15:04:21 -070076# Control segment to stage server-side package.
77STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
78 'stage_server_side_package')
79
Dan Shia06f3e22015-09-03 16:15:15 -070080# Command line to start servod in a moblab.
81START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s'
82STOP_SERVOD_CMD = 'sudo stop servod'
83
Alex Millerf1af17e2013-01-09 22:50:32 -080084def log_alarm(signum, frame):
85 logging.error("Received SIGALARM. Ignoring and continuing on.")
Alex Miller0528d6f2013-01-11 10:49:48 -080086 sys.exit(1)
Alex Millerf1af17e2013-01-09 22:50:32 -080087
Dan Shicf4d2032015-03-12 15:04:21 -070088
89def _get_machines(parser):
90 """Get a list of machine names from command line arg -m or a file.
91
92 @param parser: Parser for the command line arguments.
93
94 @return: A list of machine names from command line arg -m or the
95 machines file specified in the command line arg -M.
96 """
97 if parser.options.machines:
98 machines = parser.options.machines.replace(',', ' ').strip().split()
99 else:
100 machines = []
101 machines_file = parser.options.machines_file
102 if machines_file:
103 machines = []
104 for m in open(machines_file, 'r').readlines():
105 # remove comments, spaces
106 m = re.sub('#.*', '', m).strip()
107 if m:
108 machines.append(m)
109 logging.debug('Read list of machines from file: %s', machines_file)
110 logging.debug('Machines: %s', ','.join(machines))
111
112 if machines:
113 for machine in machines:
114 if not machine or re.search('\s', machine):
115 parser.parser.error("Invalid machine: %s" % str(machine))
116 machines = list(set(machines))
117 machines.sort()
118 return machines
119
120
121def _stage_ssp(parser):
122 """Stage server-side package.
123
124 This function calls a control segment to stage server-side package based on
125 the job and autoserv command line option. The detail implementation could
126 be different for each host type. Currently, only CrosHost has
127 stage_server_side_package function defined.
128 The script returns None if no server-side package is available. However,
129 it may raise exception if it failed for reasons other than artifact (the
130 server-side package) not found.
131
132 @param parser: Command line arguments parser passed in the autoserv process.
133
Dan Shi14de7622016-08-22 11:09:06 -0700134 @return: (ssp_url, error_msg), where
135 ssp_url is a url to the autotest server-side package. None if
136 server-side package is not supported.
137 error_msg is a string indicating the failures. None if server-
138 side package is staged successfully.
Dan Shicf4d2032015-03-12 15:04:21 -0700139 """
Kevin Chengadc99f92016-07-20 08:21:58 -0700140 machines_list = _get_machines(parser)
141 if bool(parser.options.lab):
Prathmesh Prabhu5ae41392017-01-10 15:57:25 -0800142 machines_list = server_job.get_machine_dicts(
143 machines_list, parser.options.lab)
Kevin Chengadc99f92016-07-20 08:21:58 -0700144
Dan Shi36cfd832014-10-10 13:38:51 -0700145 # If test_source_build is not specified, default to use server-side test
146 # code from build specified in --image.
Kevin Chengadc99f92016-07-20 08:21:58 -0700147 namespace = {'machines': machines_list,
Dan Shi36cfd832014-10-10 13:38:51 -0700148 'image': (parser.options.test_source_build or
149 parser.options.image),}
Dan Shicf4d2032015-03-12 15:04:21 -0700150 script_locals = {}
151 execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
Dan Shi14de7622016-08-22 11:09:06 -0700152 return script_locals['ssp_url'], script_locals['error_msg']
Dan Shicf4d2032015-03-12 15:04:21 -0700153
154
Dan Shiafa63872016-02-23 15:32:31 -0800155def _run_with_ssp(job, container_name, job_id, results, parser, ssp_url,
Dan Shi3be35af2016-08-25 23:22:40 -0700156 job_folder, machines):
Dan Shicf4d2032015-03-12 15:04:21 -0700157 """Run the server job with server-side packaging.
158
Dan Shi37befda2015-12-07 13:16:56 -0800159 @param job: The server job object.
Dan Shicf4d2032015-03-12 15:04:21 -0700160 @param container_name: Name of the container to run the test.
161 @param job_id: ID of the test job.
162 @param results: Folder to store results. This could be different from
163 parser.options.results:
164 parser.options.results can be set to None for results to be
165 stored in a temp folder.
166 results can be None for autoserv run requires no logging.
167 @param parser: Command line parser that contains the options.
168 @param ssp_url: url of the staged server-side package.
Dan Shiafa63872016-02-23 15:32:31 -0800169 @param job_folder: Name of the job result folder.
Dan Shi3be35af2016-08-25 23:22:40 -0700170 @param machines: A list of machines to run the test.
Dan Shicf4d2032015-03-12 15:04:21 -0700171 """
172 bucket = lxc.ContainerBucket()
173 control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
174 else None)
Dan Shi37befda2015-12-07 13:16:56 -0800175 try:
Dan Shi3be35af2016-08-25 23:22:40 -0700176 dut_name = machines[0] if len(machines) >= 1 else None
Dan Shi37befda2015-12-07 13:16:56 -0800177 test_container = bucket.setup_test(container_name, job_id, ssp_url,
Dan Shiafa63872016-02-23 15:32:31 -0800178 results, control=control,
Dan Shi3be35af2016-08-25 23:22:40 -0700179 job_folder=job_folder,
180 dut_name=dut_name)
Dan Shi37befda2015-12-07 13:16:56 -0800181 except Exception as e:
182 job.record('FAIL', None, None,
183 'Failed to setup container for test: %s. Check logs in '
184 'ssp_logs folder for more details.' % e)
185 raise
186
Dan Shicf4d2032015-03-12 15:04:21 -0700187 args = sys.argv[:]
188 args.remove('--require-ssp')
Dan Shi77b79a62015-07-29 16:22:05 -0700189 # --parent_job_id is only useful in autoserv running in host, not in
190 # container. Include this argument will cause test to fail for builds before
191 # CL 286265 was merged.
192 if '--parent_job_id' in args:
193 index = args.index('--parent_job_id')
194 args.remove('--parent_job_id')
195 # Remove the actual parent job id in command line arg.
196 del args[index]
Dan Shicf4d2032015-03-12 15:04:21 -0700197
198 # A dictionary of paths to replace in the command line. Key is the path to
199 # be replaced with the one in value.
200 paths_to_replace = {}
201 # Replace the control file path with the one in container.
202 if control:
203 container_control_filename = os.path.join(
204 lxc.CONTROL_TEMP_PATH, os.path.basename(control))
205 paths_to_replace[control] = container_control_filename
206 # Update result directory with the one in container.
Dan Shi65374e22016-09-15 16:14:05 -0700207 container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_folder)
Dan Shicf4d2032015-03-12 15:04:21 -0700208 if parser.options.results:
Dan Shicf4d2032015-03-12 15:04:21 -0700209 paths_to_replace[parser.options.results] = container_result_dir
210 # Update parse_job directory with the one in container. The assumption is
211 # that the result folder to be parsed is always the same as the results_dir.
212 if parser.options.parse_job:
Dan Shicf4d2032015-03-12 15:04:21 -0700213 paths_to_replace[parser.options.parse_job] = container_result_dir
214
215 args = [paths_to_replace.get(arg, arg) for arg in args]
216
217 # Apply --use-existing-results, results directory is aready created and
218 # mounted in container. Apply this arg to avoid exception being raised.
219 if not '--use-existing-results' in args:
220 args.append('--use-existing-results')
221
222 # Make sure autoserv running in container using a different pid file.
223 if not '--pidfile-label' in args:
224 args.extend(['--pidfile-label', 'container_autoserv'])
225
Dan Shid1f51232015-04-18 00:29:14 -0700226 cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
Dan Shicf4d2032015-03-12 15:04:21 -0700227 logging.info('Run command in container: %s', cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700228 success = False
Dan Shicf4d2032015-03-12 15:04:21 -0700229 try:
230 test_container.attach_run(cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700231 success = True
Dan Shi9d3454e2015-12-08 09:16:08 -0800232 except Exception as e:
233 # If the test run inside container fails without generating any log,
234 # write a message to status.log to help troubleshooting.
235 debug_files = os.listdir(os.path.join(results, 'debug'))
236 if not debug_files:
237 job.record('FAIL', None, None,
238 'Failed to run test inside the container: %s. Check '
239 'logs in ssp_logs folder for more details.' % e)
240 raise
Dan Shicf4d2032015-03-12 15:04:21 -0700241 finally:
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -0800242 metrics.Counter(
243 'chromeos/autotest/experimental/execute_job_in_ssp').increment(
244 fields={'success': success})
Dan Shi37bee222015-04-13 15:46:47 -0700245 # metadata is uploaded separately so it can use http to upload.
246 metadata = {'drone': socket.gethostname(),
247 'job_id': job_id,
248 'success': success}
249 autotest_es.post(use_http=True,
250 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
251 metadata=metadata)
Dan Shicf4d2032015-03-12 15:04:21 -0700252 test_container.destroy()
253
254
Dan Shi3f1b8a52015-04-21 11:11:06 -0700255def correct_results_folder_permission(results):
256 """Make sure the results folder has the right permission settings.
257
258 For tests running with server-side packaging, the results folder has the
259 owner of root. This must be changed to the user running the autoserv
260 process, so parsing job can access the results folder.
261 TODO(dshi): crbug.com/459344 Remove this function when test container can be
262 unprivileged container.
263
264 @param results: Path to the results folder.
265
266 """
267 if not results:
268 return
269
Dan Shi32649b82015-08-29 20:53:36 -0700270 try:
271 utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
272 utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
273 except error.CmdError as e:
274 metadata = {'error': str(e),
275 'result_folder': results,
276 'drone': socket.gethostname()}
277 autotest_es.post(use_http=True, type_str='correct_results_folder_failure',
278 metadata=metadata)
279 raise
Dan Shi3f1b8a52015-04-21 11:11:06 -0700280
281
Dan Shia06f3e22015-09-03 16:15:15 -0700282def _start_servod(machine):
283 """Try to start servod in moblab if it's not already running or running with
284 different board or port.
285
286 @param machine: Name of the dut used for test.
287 """
288 if not utils.is_moblab():
289 return
290
Dan Shi1cded882015-09-23 16:52:26 -0700291 logging.debug('Trying to start servod.')
Dan Shia06f3e22015-09-03 16:15:15 -0700292 try:
Kevin Cheng9b6930f2016-07-20 14:57:15 -0700293 afe = frontend.AFE()
Dan Shia06f3e22015-09-03 16:15:15 -0700294 board = server_utils.get_board_from_afe(machine, afe)
295 hosts = afe.get_hosts(hostname=machine)
296 servo_host = hosts[0].attributes.get('servo_host', None)
297 servo_port = hosts[0].attributes.get('servo_port', 9999)
298 if not servo_host in ['localhost', '127.0.0.1']:
Dan Shi1cded882015-09-23 16:52:26 -0700299 logging.warn('Starting servod is aborted. The dut\'s servo_host '
300 'attribute is not set to localhost.')
Dan Shia06f3e22015-09-03 16:15:15 -0700301 return
302 except (urllib2.HTTPError, urllib2.URLError):
303 # Ignore error if RPC failed to get board
304 logging.error('Failed to get board name from AFE. Start servod is '
305 'aborted')
306 return
307
308 try:
309 pid = utils.run('pgrep servod').stdout
310 cmd_line = utils.run('ps -fp %s' % pid).stdout
311 if ('--board %s' % board in cmd_line and
312 '--port %s' % servo_port in cmd_line):
313 logging.debug('Servod is already running with given board and port.'
314 ' There is no need to restart servod.')
315 return
316 logging.debug('Servod is running with different board or port. '
317 'Stopping existing servod.')
318 utils.run('sudo stop servod')
319 except error.CmdError:
320 # servod is not running.
321 pass
322
323 try:
324 utils.run(START_SERVOD_CMD % (board, servo_port))
325 logging.debug('Servod is started')
326 except error.CmdError as e:
327 logging.error('Servod failed to be started, error: %s', e)
328
329
Dan Shic68fefb2015-04-07 10:10:52 -0700330def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700331 """Run server job with given options.
332
333 @param pid_file_manager: PidFileManager used to monitor the autoserv process
334 @param results: Folder to store results.
335 @param parser: Parser for the command line arguments.
336 @param ssp_url: Url to server-side package.
Dan Shic68fefb2015-04-07 10:10:52 -0700337 @param use_ssp: Set to True to run with server-side packaging.
Dan Shicf4d2032015-03-12 15:04:21 -0700338 """
Dan Shiec1d47d2015-02-13 11:38:13 -0800339 if parser.options.warn_no_ssp:
Dan Shic68fefb2015-04-07 10:10:52 -0700340 # Post a warning in the log.
Dan Shiec1d47d2015-02-13 11:38:13 -0800341 logging.warn('Autoserv is required to run with server-side packaging. '
342 'However, no drone is found to support server-side '
343 'packaging. The test will be executed in a drone without '
344 'server-side packaging supported.')
345
jadmanski0afbb632008-06-06 21:10:57 +0000346 # send stdin to /dev/null
347 dev_null = os.open(os.devnull, os.O_RDONLY)
348 os.dup2(dev_null, sys.stdin.fileno())
349 os.close(dev_null)
mblighdbf37612007-11-24 19:38:11 +0000350
Dan Shie8aeb662016-06-30 11:22:03 -0700351 # Create separate process group if the process is not a process group
352 # leader. This allows autoserv process to keep running after the caller
353 # process (drone manager call) exits.
354 if os.getpid() != os.getpgid(0):
355 os.setsid()
mbligh1d42d4e2007-11-05 22:42:00 +0000356
Dan Shicf4d2032015-03-12 15:04:21 -0700357 # Container name is predefined so the container can be destroyed in
358 # handle_sigterm.
359 job_or_task_id = job_directories.get_job_id_or_task_id(
360 parser.options.results)
361 container_name = (lxc.TEST_CONTAINER_NAME_FMT %
Dan Shid68d51c2015-04-21 17:00:42 -0700362 (job_or_task_id, time.time(), os.getpid()))
Dan Shiafa63872016-02-23 15:32:31 -0800363 job_folder = job_directories.get_job_folder_name(parser.options.results)
Dan Shicf4d2032015-03-12 15:04:21 -0700364
jadmanski0afbb632008-06-06 21:10:57 +0000365 # Implement SIGTERM handler
mblighc2299562009-07-02 19:00:36 +0000366 def handle_sigterm(signum, frame):
Simran Basi9d9b7292013-10-16 16:44:07 -0700367 logging.debug('Received SIGTERM')
mblighff7d61f2008-12-22 14:53:35 +0000368 if pid_file_manager:
369 pid_file_manager.close_file(1, signal.SIGTERM)
Simran Basi49e21e62013-10-17 12:40:33 -0700370 logging.debug('Finished writing to pid_file. Killing process.')
Dan Shi3f1b8a52015-04-21 11:11:06 -0700371
372 # Update results folder's file permission. This needs to be done ASAP
373 # before the parsing process tries to access the log.
374 if use_ssp and results:
375 correct_results_folder_permission(results)
376
Simran Basid6b83772014-01-06 16:31:30 -0800377 # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
378 # This sleep allows the pending output to be logged before the kill
379 # signal is sent.
380 time.sleep(.1)
Dan Shic68fefb2015-04-07 10:10:52 -0700381 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700382 logging.debug('Destroy container %s before aborting the autoserv '
383 'process.', container_name)
Dan Shi3f1b8a52015-04-21 11:11:06 -0700384 metadata = {'drone': socket.gethostname(),
385 'job_id': job_or_task_id,
386 'container_name': container_name,
387 'action': 'abort',
388 'success': True}
Dan Shicf4d2032015-03-12 15:04:21 -0700389 try:
390 bucket = lxc.ContainerBucket()
391 container = bucket.get(container_name)
392 if container:
393 container.destroy()
394 else:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700395 metadata['success'] = False
396 metadata['error'] = 'container not found'
Dan Shicf4d2032015-03-12 15:04:21 -0700397 logging.debug('Container %s is not found.', container_name)
398 except:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700399 metadata['success'] = False
Dan Shi65374e22016-09-15 16:14:05 -0700400 metadata['error'] = 'Exception: %s' % str(sys.exc_info())
Dan Shicf4d2032015-03-12 15:04:21 -0700401 # Handle any exception so the autoserv process can be aborted.
Dan Shi65374e22016-09-15 16:14:05 -0700402 logging.exception('Failed to destroy container %s.',
403 container_name)
Dan Shi3f1b8a52015-04-21 11:11:06 -0700404 autotest_es.post(use_http=True,
405 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
406 metadata=metadata)
Dan Shie4a4f9f2015-07-20 09:00:25 -0700407 # Try to correct the result file permission again after the
408 # container is destroyed, as the container might have created some
409 # new files in the result folder.
410 if results:
411 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700412
jadmanski0afbb632008-06-06 21:10:57 +0000413 os.killpg(os.getpgrp(), signal.SIGKILL)
mblighfaf0cd42007-11-19 16:00:24 +0000414
jadmanski0afbb632008-06-06 21:10:57 +0000415 # Set signal handler
mblighc2299562009-07-02 19:00:36 +0000416 signal.signal(signal.SIGTERM, handle_sigterm)
mbligha46678d2008-05-01 20:00:01 +0000417
Simran Basid6b83772014-01-06 16:31:30 -0800418 # faulthandler is only needed to debug in the Lab and is not avaliable to
419 # be imported in the chroot as part of VMTest, so Try-Except it.
420 try:
421 import faulthandler
422 faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
423 logging.debug('faulthandler registered on SIGTERM.')
424 except ImportError:
Christopher Grant4beca022015-06-16 15:14:47 -0400425 sys.exc_clear()
Simran Basid6b83772014-01-06 16:31:30 -0800426
David Rochberg8a60d1e2011-02-01 14:22:07 -0500427 # Ignore SIGTTOU's generated by output from forked children.
428 signal.signal(signal.SIGTTOU, signal.SIG_IGN)
429
Alex Millerf1af17e2013-01-09 22:50:32 -0800430 # If we received a SIGALARM, let's be loud about it.
431 signal.signal(signal.SIGALRM, log_alarm)
432
mbligha5f5e542009-12-30 16:57:49 +0000433 # Server side tests that call shell scripts often depend on $USER being set
434 # but depending on how you launch your autotest scheduler it may not be set.
435 os.environ['USER'] = getpass.getuser()
436
mblighb2bea302008-07-24 20:25:57 +0000437 label = parser.options.label
mbligh374f3412009-05-13 21:29:45 +0000438 group_name = parser.options.group_name
mblighb2bea302008-07-24 20:25:57 +0000439 user = parser.options.user
440 client = parser.options.client
441 server = parser.options.server
jadmanski0afbb632008-06-06 21:10:57 +0000442 install_before = parser.options.install_before
mblighb2bea302008-07-24 20:25:57 +0000443 install_after = parser.options.install_after
444 verify = parser.options.verify
445 repair = parser.options.repair
showard45ae8192008-11-05 19:32:53 +0000446 cleanup = parser.options.cleanup
Alex Millercb79ba72013-05-29 14:43:00 -0700447 provision = parser.options.provision
Dan Shi07e09af2013-04-12 09:31:29 -0700448 reset = parser.options.reset
Alex Miller667b5f22014-02-28 15:33:39 -0800449 job_labels = parser.options.job_labels
mblighb2bea302008-07-24 20:25:57 +0000450 no_tee = parser.options.no_tee
jadmanski0afbb632008-06-06 21:10:57 +0000451 parse_job = parser.options.parse_job
mblighe7d9c602009-07-02 19:02:33 +0000452 execution_tag = parser.options.execution_tag
453 if not execution_tag:
454 execution_tag = parse_job
jadmanski0afbb632008-06-06 21:10:57 +0000455 ssh_user = parser.options.ssh_user
456 ssh_port = parser.options.ssh_port
457 ssh_pass = parser.options.ssh_pass
jadmanskidef0c3c2009-03-25 20:07:10 +0000458 collect_crashinfo = parser.options.collect_crashinfo
mblighe0cbc912010-03-11 18:03:07 +0000459 control_filename = parser.options.control_filename
Scott Zawalski91493c82013-01-25 16:15:20 -0500460 test_retry = parser.options.test_retry
beepscb6f1e22013-06-28 19:14:10 -0700461 verify_job_repo_url = parser.options.verify_job_repo_url
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700462 skip_crash_collection = parser.options.skip_crash_collection
Aviv Keshet18ee3142013-08-12 15:01:51 -0700463 ssh_verbosity = int(parser.options.ssh_verbosity)
Fang Deng6cc20de2013-09-06 15:47:32 -0700464 ssh_options = parser.options.ssh_options
Dan Shib669cbd2013-09-13 11:17:17 -0700465 no_use_packaging = parser.options.no_use_packaging
Simran Basi1bf60eb2015-12-01 16:39:29 -0800466 host_attributes = parser.options.host_attributes
467 in_lab = bool(parser.options.lab)
mbligha46678d2008-05-01 20:00:01 +0000468
mblighb2bea302008-07-24 20:25:57 +0000469 # can't be both a client and a server side test
470 if client and server:
Eric Li861b2d52011-02-04 14:50:35 -0800471 parser.parser.error("Can not specify a test as both server and client!")
mblighb2bea302008-07-24 20:25:57 +0000472
Alex Millercb79ba72013-05-29 14:43:00 -0700473 if provision and client:
474 parser.parser.error("Cannot specify provisioning and client!")
475
476 is_special_task = (verify or repair or cleanup or collect_crashinfo or
Dan Shi07e09af2013-04-12 09:31:29 -0700477 provision or reset)
Alex Millercb79ba72013-05-29 14:43:00 -0700478 if len(parser.args) < 1 and not is_special_task:
Eric Li861b2d52011-02-04 14:50:35 -0800479 parser.parser.error("Missing argument: control file")
mbligha46678d2008-05-01 20:00:01 +0000480
Aviv Keshet18ee3142013-08-12 15:01:51 -0700481 if ssh_verbosity > 0:
482 # ssh_verbosity is an integer between 0 and 3, inclusive
483 ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
Fang Dengd1c2b732013-08-20 12:59:46 -0700484 else:
485 ssh_verbosity_flag = ''
Aviv Keshet18ee3142013-08-12 15:01:51 -0700486
showard45ae8192008-11-05 19:32:53 +0000487 # We have a control file unless it's just a verify/repair/cleanup job
jadmanski0afbb632008-06-06 21:10:57 +0000488 if len(parser.args) > 0:
489 control = parser.args[0]
490 else:
491 control = None
mbligha46678d2008-05-01 20:00:01 +0000492
Dan Shicf4d2032015-03-12 15:04:21 -0700493 machines = _get_machines(parser)
mbligh374f3412009-05-13 21:29:45 +0000494 if group_name and len(machines) < 2:
Dan Shicf4d2032015-03-12 15:04:21 -0700495 parser.parser.error('-G %r may only be supplied with more than one '
496 'machine.' % group_name)
mbligh374f3412009-05-13 21:29:45 +0000497
Christopher Wiley8a91f232013-07-09 11:02:27 -0700498 kwargs = {'group_name': group_name, 'tag': execution_tag,
Dan Shicf4d2032015-03-12 15:04:21 -0700499 'disable_sysinfo': parser.options.disable_sysinfo}
Dan Shi70647ca2015-07-16 22:52:35 -0700500 if parser.options.parent_job_id:
501 kwargs['parent_job_id'] = int(parser.options.parent_job_id)
mblighe0cbc912010-03-11 18:03:07 +0000502 if control_filename:
503 kwargs['control_filename'] = control_filename
Simran Basi1bf60eb2015-12-01 16:39:29 -0800504 if host_attributes:
505 kwargs['host_attributes'] = host_attributes
506 kwargs['in_lab'] = in_lab
jadmanski0afbb632008-06-06 21:10:57 +0000507 job = server_job.server_job(control, parser.args[1:], results, label,
508 user, machines, client, parse_job,
Fang Dengd1c2b732013-08-20 12:59:46 -0700509 ssh_user, ssh_port, ssh_pass,
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700510 ssh_verbosity_flag, ssh_options,
511 test_retry, **kwargs)
Dan Shicf4d2032015-03-12 15:04:21 -0700512
showard75cdfee2009-06-10 17:40:41 +0000513 job.logging.start_logging()
mbligh4608b002010-01-05 18:22:35 +0000514 job.init_parser()
mbligha46678d2008-05-01 20:00:01 +0000515
mbligh161fe6f2008-06-19 16:26:04 +0000516 # perform checks
517 job.precheck()
518
jadmanski0afbb632008-06-06 21:10:57 +0000519 # run the job
520 exit_code = 0
Dan Shic1b8bdd2015-09-14 23:11:24 -0700521 auto_start_servod = _CONFIG.get_config_value(
522 'AUTOSERV', 'auto_start_servod', type=bool, default=False)
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700523
jadmanski0afbb632008-06-06 21:10:57 +0000524 try:
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700525 with site_utils.SetupTsMonGlobalState('autoserv', indirect=True, short_lived=True):
526 try:
527 if repair:
528 if auto_start_servod and len(machines) == 1:
529 _start_servod(machines[0])
530 job.repair(job_labels)
531 elif verify:
532 job.verify(job_labels)
533 elif provision:
534 job.provision(job_labels)
535 elif reset:
536 job.reset(job_labels)
537 elif cleanup:
538 job.cleanup(job_labels)
Dan Shicf4d2032015-03-12 15:04:21 -0700539 else:
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700540 if auto_start_servod and len(machines) == 1:
541 _start_servod(machines[0])
542 if use_ssp:
Dan Shiafa63872016-02-23 15:32:31 -0800543 try:
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700544 _run_with_ssp(job, container_name, job_or_task_id,
545 results, parser, ssp_url, job_folder,
546 machines)
547 finally:
548 # Update the ownership of files in result folder.
Dan Shiafa63872016-02-23 15:32:31 -0800549 correct_results_folder_permission(results)
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700550 else:
551 if collect_crashinfo:
552 # Update the ownership of files in result folder. If the
553 # job to collect crashinfo was running inside container
554 # (SSP) and crashed before correcting folder permission,
555 # the result folder might have wrong permission setting.
556 try:
557 correct_results_folder_permission(results)
558 except:
559 # Ignore any error as the user may not have root
560 # permission to run sudo command.
561 pass
562 job.run(install_before, install_after,
563 verify_job_repo_url=verify_job_repo_url,
564 only_collect_crashinfo=collect_crashinfo,
565 skip_crash_collection=skip_crash_collection,
566 job_labels=job_labels,
567 use_packaging=(not no_use_packaging))
568 finally:
569 while job.hosts:
570 host = job.hosts.pop()
571 host.close()
jadmanski0afbb632008-06-06 21:10:57 +0000572 except:
jadmanski27b37ea2008-10-29 23:54:31 +0000573 exit_code = 1
jadmanski0afbb632008-06-06 21:10:57 +0000574 traceback.print_exc()
mbligha46678d2008-05-01 20:00:01 +0000575
mblighff7d61f2008-12-22 14:53:35 +0000576 if pid_file_manager:
577 pid_file_manager.num_tests_failed = job.num_tests_failed
578 pid_file_manager.close_file(exit_code)
jadmanskie0dffc32008-12-15 17:30:30 +0000579 job.cleanup_parser()
showard21baa452008-10-21 00:08:39 +0000580
jadmanski27b37ea2008-10-29 23:54:31 +0000581 sys.exit(exit_code)
mbligha46678d2008-05-01 20:00:01 +0000582
583
Fang Deng042c1472014-10-23 13:56:41 -0700584def record_autoserv(options, duration_secs):
585 """Record autoserv end-to-end time in metadata db.
586
587 @param options: parser options.
588 @param duration_secs: How long autoserv has taken, in secs.
589 """
590 # Get machine hostname
591 machines = options.machines.replace(
592 ',', ' ').strip().split() if options.machines else []
593 num_machines = len(machines)
594 if num_machines > 1:
595 # Skip the case where atomic group is used.
596 return
597 elif num_machines == 0:
598 machines.append('hostless')
599
600 # Determine the status that will be reported.
601 s = job_overhead.STATUS
602 task_mapping = {
603 'reset': s.RESETTING, 'verify': s.VERIFYING,
604 'provision': s.PROVISIONING, 'repair': s.REPAIRING,
605 'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
Dan Shi888cfca2015-07-31 15:49:00 -0700606 match = filter(lambda task: getattr(options, task, False) == True,
607 task_mapping)
Fang Deng042c1472014-10-23 13:56:41 -0700608 status = task_mapping[match[0]] if match else s.RUNNING
609 is_special_task = status not in [s.RUNNING, s.GATHERING]
Dan Shicf4d2032015-03-12 15:04:21 -0700610 job_or_task_id = job_directories.get_job_id_or_task_id(options.results)
Fang Deng042c1472014-10-23 13:56:41 -0700611 job_overhead.record_state_duration(
612 job_or_task_id, machines[0], status, duration_secs,
613 is_special_task=is_special_task)
614
615
mbligha46678d2008-05-01 20:00:01 +0000616def main():
Fang Deng042c1472014-10-23 13:56:41 -0700617 start_time = datetime.datetime.now()
jadmanski0afbb632008-06-06 21:10:57 +0000618 # grab the parser
619 parser = autoserv_parser.autoserv_parser
mbligha5cb4062009-02-17 15:53:39 +0000620 parser.parse_args()
mbligha46678d2008-05-01 20:00:01 +0000621
jadmanski0afbb632008-06-06 21:10:57 +0000622 if len(sys.argv) == 1:
623 parser.parser.print_help()
624 sys.exit(1)
mbligha6f13082008-06-05 23:53:46 +0000625
Dan Shicf4d2032015-03-12 15:04:21 -0700626 # If the job requires to run with server-side package, try to stage server-
627 # side package first. If that fails with error that autotest server package
Dan Shic68fefb2015-04-07 10:10:52 -0700628 # does not exist, fall back to run the job without using server-side
629 # packaging. If option warn_no_ssp is specified, that means autoserv is
630 # running in a drone does not support SSP, thus no need to stage server-side
631 # package.
Dan Shicf4d2032015-03-12 15:04:21 -0700632 ssp_url = None
Dan Shi0b754c52015-04-20 14:20:38 -0700633 ssp_url_warning = False
Dan Shic68fefb2015-04-07 10:10:52 -0700634 if (not parser.options.warn_no_ssp and parser.options.require_ssp):
Dan Shi14de7622016-08-22 11:09:06 -0700635 ssp_url, ssp_error_msg = _stage_ssp(parser)
Dan Shi0b754c52015-04-20 14:20:38 -0700636 # The build does not have autotest server package. Fall back to not
637 # to use server-side package. Logging is postponed until logging being
638 # set up.
639 ssp_url_warning = not ssp_url
Dan Shicf4d2032015-03-12 15:04:21 -0700640
showard75cdfee2009-06-10 17:40:41 +0000641 if parser.options.no_logging:
642 results = None
643 else:
644 results = parser.options.results
mbligh80e1eba2008-11-19 00:26:18 +0000645 if not results:
646 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
Dan Shi14de7622016-08-22 11:09:06 -0700647 results = os.path.abspath(results)
showard566d3c02010-01-12 18:57:01 +0000648 resultdir_exists = False
649 for filename in ('control.srv', 'status.log', '.autoserv_execute'):
650 if os.path.exists(os.path.join(results, filename)):
651 resultdir_exists = True
mbligh4608b002010-01-05 18:22:35 +0000652 if not parser.options.use_existing_results and resultdir_exists:
mbligh80e1eba2008-11-19 00:26:18 +0000653 error = "Error: results directory already exists: %s\n" % results
654 sys.stderr.write(error)
655 sys.exit(1)
mbligha788dc42009-03-26 21:10:16 +0000656
657 # Now that we certified that there's no leftover results dir from
658 # previous jobs, lets create the result dir since the logging system
659 # needs to create the log file in there.
660 if not os.path.isdir(results):
661 os.makedirs(results)
showard75cdfee2009-06-10 17:40:41 +0000662
Dan Shic68fefb2015-04-07 10:10:52 -0700663 # Server-side packaging will only be used if it's required and the package
664 # is available. If warn_no_ssp is specified, it means that autoserv is
665 # running in a drone does not have SSP supported and a warning will be logs.
666 # Therefore, it should not run with SSP.
667 use_ssp = (not parser.options.warn_no_ssp and parser.options.require_ssp
668 and ssp_url)
669 if use_ssp:
Dan Shie28de552015-05-06 16:51:58 -0700670 log_dir = os.path.join(results, 'ssp_logs') if results else None
Dan Shicf4d2032015-03-12 15:04:21 -0700671 if log_dir and not os.path.exists(log_dir):
672 os.makedirs(log_dir)
673 else:
674 log_dir = results
Dan Shi3f1b8a52015-04-21 11:11:06 -0700675
showard75cdfee2009-06-10 17:40:41 +0000676 logging_manager.configure_logging(
Dan Shicf4d2032015-03-12 15:04:21 -0700677 server_logging_config.ServerLoggingConfig(),
678 results_dir=log_dir,
showard10d84172009-06-18 23:16:50 +0000679 use_console=not parser.options.no_tee,
680 verbose=parser.options.verbose,
681 no_console_prefix=parser.options.no_console_prefix)
Dan Shicf4d2032015-03-12 15:04:21 -0700682
Dan Shi0b754c52015-04-20 14:20:38 -0700683 if ssp_url_warning:
684 logging.warn(
685 'Autoserv is required to run with server-side packaging. '
686 'However, no server-side package can be found based on '
Dan Shi6450e142016-03-11 11:52:20 -0800687 '`--image`, host attribute job_repo_url or host OS version '
688 'label. It could be that the build to test is older than the '
689 'minimum version that supports server-side packaging. The test '
Dan Shi14de7622016-08-22 11:09:06 -0700690 'will be executed without using erver-side packaging. '
691 'Following is the detailed error:\n%s', ssp_error_msg)
Dan Shi0b754c52015-04-20 14:20:38 -0700692
showard75cdfee2009-06-10 17:40:41 +0000693 if results:
mbligha788dc42009-03-26 21:10:16 +0000694 logging.info("Results placed in %s" % results)
mbligh10717632008-11-19 00:21:57 +0000695
mbligh4608b002010-01-05 18:22:35 +0000696 # wait until now to perform this check, so it get properly logged
Dan Shicf4d2032015-03-12 15:04:21 -0700697 if (parser.options.use_existing_results and not resultdir_exists and
Dan Shiff78f112015-06-12 13:34:02 -0700698 not utils.is_in_container()):
mbligh4608b002010-01-05 18:22:35 +0000699 logging.error("No existing results directory found: %s", results)
700 sys.exit(1)
701
Dan Shicf4d2032015-03-12 15:04:21 -0700702 logging.debug('autoserv is running in drone %s.', socket.gethostname())
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700703 logging.debug('autoserv command was: %s', ' '.join(sys.argv))
mbligh4608b002010-01-05 18:22:35 +0000704
Dan Shicf4d2032015-03-12 15:04:21 -0700705 if parser.options.write_pidfile and results:
mbligh4608b002010-01-05 18:22:35 +0000706 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
707 results)
jadmanskid5ab8c52008-12-03 16:27:07 +0000708 pid_file_manager.open_file()
mblighff7d61f2008-12-22 14:53:35 +0000709 else:
710 pid_file_manager = None
mbligha46678d2008-05-01 20:00:01 +0000711
jadmanskif22fea82008-11-26 20:57:07 +0000712 autotest.BaseAutotest.set_install_in_tmpdir(
713 parser.options.install_in_tmpdir)
714
Dan Shia1ecd5c2013-06-06 11:21:31 -0700715 try:
716 # Take the first argument as control file name, get the test name from
Prathmesh Prabhueb7cfc12016-11-23 16:37:30 -0800717 # the control file.
Dan Shia1ecd5c2013-06-06 11:21:31 -0700718 if (len(parser.args) > 0 and parser.args[0] != '' and
719 parser.options.machines):
Dan Shibbc16132013-07-09 16:23:59 -0700720 try:
721 test_name = control_data.parse_control(parser.args[0],
722 raise_warnings=True).name
723 except control_data.ControlVariableException:
724 logging.debug('Failed to retrieve test name from control file.')
725 test_name = None
Dan Shia1ecd5c2013-06-06 11:21:31 -0700726 except control_data.ControlVariableException as e:
727 logging.error(str(e))
jadmanski0afbb632008-06-06 21:10:57 +0000728 exit_code = 0
Prashanth B6285f6a2014-05-08 18:01:27 -0700729 # TODO(beeps): Extend this to cover different failure modes.
730 # Testing exceptions are matched against labels sent to autoserv. Eg,
731 # to allow only the hostless job to run, specify
732 # testing_exceptions: test_suite in the shadow_config. To allow both
733 # the hostless job and dummy_Pass to run, specify
734 # testing_exceptions: test_suite,dummy_Pass. You can figure out
735 # what label autoserv is invoked with by looking through the logs of a test
736 # for the autoserv command's -l option.
Dan Shia06f3e22015-09-03 16:15:15 -0700737 testing_exceptions = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700738 'AUTOSERV', 'testing_exceptions', type=list, default=[])
Dan Shia06f3e22015-09-03 16:15:15 -0700739 test_mode = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700740 'AUTOSERV', 'testing_mode', type=bool, default=False)
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800741 test_mode = (results_mocker and test_mode and not
742 any([ex in parser.options.label
743 for ex in testing_exceptions]))
744 is_task = (parser.options.verify or parser.options.repair or
745 parser.options.provision or parser.options.reset or
746 parser.options.cleanup or parser.options.collect_crashinfo)
jadmanski0afbb632008-06-06 21:10:57 +0000747 try:
748 try:
Prashanth B6285f6a2014-05-08 18:01:27 -0700749 if test_mode:
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800750 # The parser doesn't run on tasks anyway, so we can just return
751 # happy signals without faking results.
752 if not is_task:
753 machine = parser.options.results.split('/')[-1]
754
755 # TODO(beeps): The proper way to do this would be to
756 # refactor job creation so we can invoke job.record
757 # directly. To do that one needs to pipe the test_name
758 # through run_autoserv and bail just before invoking
759 # the server job. See the comment in
760 # puppylab/results_mocker for more context.
761 results_mocker.ResultsMocker(
Prashanth Balasubramanian22dd2262014-11-28 18:19:18 -0800762 test_name if test_name else 'unknown-test',
763 parser.options.results, machine
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800764 ).mock_results()
765 return
Prashanth B6285f6a2014-05-08 18:01:27 -0700766 else:
Dan Shic68fefb2015-04-07 10:10:52 -0700767 run_autoserv(pid_file_manager, results, parser, ssp_url,
768 use_ssp)
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700769 except SystemExit as e:
jadmanski0afbb632008-06-06 21:10:57 +0000770 exit_code = e.code
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700771 if exit_code:
772 logging.exception(e)
773 except Exception as e:
jadmanski0afbb632008-06-06 21:10:57 +0000774 # If we don't know what happened, we'll classify it as
775 # an 'abort' and return 1.
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700776 logging.exception(e)
jadmanski0afbb632008-06-06 21:10:57 +0000777 exit_code = 1
778 finally:
mblighff7d61f2008-12-22 14:53:35 +0000779 if pid_file_manager:
780 pid_file_manager.close_file(exit_code)
Fang Deng042c1472014-10-23 13:56:41 -0700781 # Record the autoserv duration time. Must be called
782 # just before the system exits to ensure accuracy.
783 duration_secs = (datetime.datetime.now() - start_time).total_seconds()
784 record_autoserv(parser.options, duration_secs)
jadmanski0afbb632008-06-06 21:10:57 +0000785 sys.exit(exit_code)
mblighfaf0cd42007-11-19 16:00:24 +0000786
mblighbb421852008-03-11 22:36:16 +0000787
mbligha46678d2008-05-01 20:00:01 +0000788if __name__ == '__main__':
jadmanski0afbb632008-06-06 21:10:57 +0000789 main()