blob: 54760c3db43a130f65dcd51140a01d73a69c90ca [file] [log] [blame]
mbligh6203ace2007-10-04 21:54:24 +00001#!/usr/bin/python -u
mbligh1ffd5dc2008-11-25 13:24:05 +00002# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
mbligh82648e52008-11-20 16:54:25 +00003# Released under the GPL v2
mblighdcd57a82007-07-11 23:06:47 +00004
mblighc8949b82007-07-23 16:33:58 +00005"""
Aviv Keshetde6bb192013-01-30 16:17:22 -08006Run a control file through the server side engine
mblighdcd57a82007-07-11 23:06:47 +00007"""
mbligh1ffd5dc2008-11-25 13:24:05 +00008
Fang Deng042c1472014-10-23 13:56:41 -07009import datetime
Paul Hobbs20cc72a2016-08-30 16:57:05 -070010import contextlib
Fang Deng042c1472014-10-23 13:56:41 -070011import getpass
12import logging
13import os
14import re
15import signal
Dan Shicf4d2032015-03-12 15:04:21 -070016import socket
Fang Deng042c1472014-10-23 13:56:41 -070017import sys
18import traceback
19import time
20import urllib2
mbligh1ffd5dc2008-11-25 13:24:05 +000021
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080022
mblighf5427bb2008-04-09 15:55:57 +000023import common
Dan Shia1ecd5c2013-06-06 11:21:31 -070024from autotest_lib.client.common_lib import control_data
Dan Shi32649b82015-08-29 20:53:36 -070025from autotest_lib.client.common_lib import error
Dan Shia1ecd5c2013-06-06 11:21:31 -070026from autotest_lib.client.common_lib import global_config
Dan Shi5ddf6a32015-05-02 00:22:01 -070027from autotest_lib.client.common_lib import utils
Dan Shi37bee222015-04-13 15:46:47 -070028from autotest_lib.client.common_lib.cros.graphite import autotest_es
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080029
30from chromite.lib import metrics
31
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -080032try:
33 from autotest_lib.puppylab import results_mocker
34except ImportError:
35 results_mocker = None
36
Dan Shia06f3e22015-09-03 16:15:15 -070037_CONFIG = global_config.global_config
38
39require_atfork = _CONFIG.get_config_value(
mblighcb8cb332009-09-03 21:08:56 +000040 'AUTOSERV', 'require_atfork_module', type=bool, default=True)
41
Dan Shia1ecd5c2013-06-06 11:21:31 -070042
Jakob Jueliche497b552014-09-23 19:11:59 -070043# Number of seconds to wait before returning if testing mode is enabled
Prashanth B6285f6a2014-05-08 18:01:27 -070044TESTING_MODE_SLEEP_SECS = 1
Jakob Jueliche497b552014-09-23 19:11:59 -070045
mblighcb8cb332009-09-03 21:08:56 +000046try:
47 import atfork
48 atfork.monkeypatch_os_fork_functions()
49 import atfork.stdlib_fixer
50 # Fix the Python standard library for threading+fork safety with its
51 # internal locks. http://code.google.com/p/python-atfork/
52 import warnings
53 warnings.filterwarnings('ignore', 'logging module already imported')
54 atfork.stdlib_fixer.fix_logging_module()
55except ImportError, e:
56 from autotest_lib.client.common_lib import global_config
Dan Shia06f3e22015-09-03 16:15:15 -070057 if _CONFIG.get_config_value(
mblighcb8cb332009-09-03 21:08:56 +000058 'AUTOSERV', 'require_atfork_module', type=bool, default=False):
59 print >>sys.stderr, 'Please run utils/build_externals.py'
60 print e
61 sys.exit(1)
mbligh9ff89cd2009-09-03 20:28:17 +000062
Kevin Cheng9b6930f2016-07-20 14:57:15 -070063from autotest_lib.server import frontend
showard75cdfee2009-06-10 17:40:41 +000064from autotest_lib.server import server_logging_config
showard043c62a2009-06-10 19:48:57 +000065from autotest_lib.server import server_job, utils, autoserv_parser, autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070066from autotest_lib.server import utils as server_utils
Paul Hobbs20cc72a2016-08-30 16:57:05 -070067from autotest_lib.server import site_utils
Kevin Chengadc99f92016-07-20 08:21:58 -070068from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Dan Shicf4d2032015-03-12 15:04:21 -070069from autotest_lib.site_utils import job_directories
Fang Deng042c1472014-10-23 13:56:41 -070070from autotest_lib.site_utils import job_overhead
Dan Shicf4d2032015-03-12 15:04:21 -070071from autotest_lib.site_utils import lxc
Dan Shi7836d252015-04-27 15:33:58 -070072from autotest_lib.site_utils import lxc_utils
showard75cdfee2009-06-10 17:40:41 +000073from autotest_lib.client.common_lib import pidfile, logging_manager
mbligh92c0fc22008-11-20 16:52:23 +000074
Paul Hobbs20cc72a2016-08-30 16:57:05 -070075
Dan Shicf4d2032015-03-12 15:04:21 -070076# Control segment to stage server-side package.
77STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
78 'stage_server_side_package')
79
Dan Shia06f3e22015-09-03 16:15:15 -070080# Command line to start servod in a moblab.
81START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s'
82STOP_SERVOD_CMD = 'sudo stop servod'
83
Alex Millerf1af17e2013-01-09 22:50:32 -080084def log_alarm(signum, frame):
85 logging.error("Received SIGALARM. Ignoring and continuing on.")
Alex Miller0528d6f2013-01-11 10:49:48 -080086 sys.exit(1)
Alex Millerf1af17e2013-01-09 22:50:32 -080087
Dan Shicf4d2032015-03-12 15:04:21 -070088
89def _get_machines(parser):
90 """Get a list of machine names from command line arg -m or a file.
91
92 @param parser: Parser for the command line arguments.
93
94 @return: A list of machine names from command line arg -m or the
95 machines file specified in the command line arg -M.
96 """
97 if parser.options.machines:
98 machines = parser.options.machines.replace(',', ' ').strip().split()
99 else:
100 machines = []
101 machines_file = parser.options.machines_file
102 if machines_file:
103 machines = []
104 for m in open(machines_file, 'r').readlines():
105 # remove comments, spaces
106 m = re.sub('#.*', '', m).strip()
107 if m:
108 machines.append(m)
109 logging.debug('Read list of machines from file: %s', machines_file)
110 logging.debug('Machines: %s', ','.join(machines))
111
112 if machines:
113 for machine in machines:
114 if not machine or re.search('\s', machine):
115 parser.parser.error("Invalid machine: %s" % str(machine))
116 machines = list(set(machines))
117 machines.sort()
118 return machines
119
120
121def _stage_ssp(parser):
122 """Stage server-side package.
123
124 This function calls a control segment to stage server-side package based on
125 the job and autoserv command line option. The detail implementation could
126 be different for each host type. Currently, only CrosHost has
127 stage_server_side_package function defined.
128 The script returns None if no server-side package is available. However,
129 it may raise exception if it failed for reasons other than artifact (the
130 server-side package) not found.
131
132 @param parser: Command line arguments parser passed in the autoserv process.
133
Dan Shi14de7622016-08-22 11:09:06 -0700134 @return: (ssp_url, error_msg), where
135 ssp_url is a url to the autotest server-side package. None if
136 server-side package is not supported.
137 error_msg is a string indicating the failures. None if server-
138 side package is staged successfully.
Dan Shicf4d2032015-03-12 15:04:21 -0700139 """
Kevin Chengadc99f92016-07-20 08:21:58 -0700140 machines_list = _get_machines(parser)
Prathmesh Prabhucbebd982017-01-10 15:59:43 -0800141 machines_list = server_job.get_machine_dicts(
142 machines_list, parser.options.lab, parser.options.host_attributes)
Kevin Chengadc99f92016-07-20 08:21:58 -0700143
Dan Shi36cfd832014-10-10 13:38:51 -0700144 # If test_source_build is not specified, default to use server-side test
145 # code from build specified in --image.
Kevin Chengadc99f92016-07-20 08:21:58 -0700146 namespace = {'machines': machines_list,
Dan Shi36cfd832014-10-10 13:38:51 -0700147 'image': (parser.options.test_source_build or
148 parser.options.image),}
Dan Shicf4d2032015-03-12 15:04:21 -0700149 script_locals = {}
150 execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
Dan Shi14de7622016-08-22 11:09:06 -0700151 return script_locals['ssp_url'], script_locals['error_msg']
Dan Shicf4d2032015-03-12 15:04:21 -0700152
153
Dan Shiafa63872016-02-23 15:32:31 -0800154def _run_with_ssp(job, container_name, job_id, results, parser, ssp_url,
Dan Shi3be35af2016-08-25 23:22:40 -0700155 job_folder, machines):
Dan Shicf4d2032015-03-12 15:04:21 -0700156 """Run the server job with server-side packaging.
157
Dan Shi37befda2015-12-07 13:16:56 -0800158 @param job: The server job object.
Dan Shicf4d2032015-03-12 15:04:21 -0700159 @param container_name: Name of the container to run the test.
160 @param job_id: ID of the test job.
161 @param results: Folder to store results. This could be different from
162 parser.options.results:
163 parser.options.results can be set to None for results to be
164 stored in a temp folder.
165 results can be None for autoserv run requires no logging.
166 @param parser: Command line parser that contains the options.
167 @param ssp_url: url of the staged server-side package.
Dan Shiafa63872016-02-23 15:32:31 -0800168 @param job_folder: Name of the job result folder.
Dan Shi3be35af2016-08-25 23:22:40 -0700169 @param machines: A list of machines to run the test.
Dan Shicf4d2032015-03-12 15:04:21 -0700170 """
171 bucket = lxc.ContainerBucket()
172 control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
173 else None)
Dan Shi37befda2015-12-07 13:16:56 -0800174 try:
Dan Shi3be35af2016-08-25 23:22:40 -0700175 dut_name = machines[0] if len(machines) >= 1 else None
Dan Shi37befda2015-12-07 13:16:56 -0800176 test_container = bucket.setup_test(container_name, job_id, ssp_url,
Dan Shiafa63872016-02-23 15:32:31 -0800177 results, control=control,
Dan Shi3be35af2016-08-25 23:22:40 -0700178 job_folder=job_folder,
179 dut_name=dut_name)
Dan Shi37befda2015-12-07 13:16:56 -0800180 except Exception as e:
181 job.record('FAIL', None, None,
182 'Failed to setup container for test: %s. Check logs in '
183 'ssp_logs folder for more details.' % e)
184 raise
185
Dan Shicf4d2032015-03-12 15:04:21 -0700186 args = sys.argv[:]
187 args.remove('--require-ssp')
Dan Shi77b79a62015-07-29 16:22:05 -0700188 # --parent_job_id is only useful in autoserv running in host, not in
189 # container. Include this argument will cause test to fail for builds before
190 # CL 286265 was merged.
191 if '--parent_job_id' in args:
192 index = args.index('--parent_job_id')
193 args.remove('--parent_job_id')
194 # Remove the actual parent job id in command line arg.
195 del args[index]
Dan Shicf4d2032015-03-12 15:04:21 -0700196
197 # A dictionary of paths to replace in the command line. Key is the path to
198 # be replaced with the one in value.
199 paths_to_replace = {}
200 # Replace the control file path with the one in container.
201 if control:
202 container_control_filename = os.path.join(
203 lxc.CONTROL_TEMP_PATH, os.path.basename(control))
204 paths_to_replace[control] = container_control_filename
205 # Update result directory with the one in container.
Dan Shi65374e22016-09-15 16:14:05 -0700206 container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_folder)
Dan Shicf4d2032015-03-12 15:04:21 -0700207 if parser.options.results:
Dan Shicf4d2032015-03-12 15:04:21 -0700208 paths_to_replace[parser.options.results] = container_result_dir
209 # Update parse_job directory with the one in container. The assumption is
210 # that the result folder to be parsed is always the same as the results_dir.
211 if parser.options.parse_job:
Dan Shicf4d2032015-03-12 15:04:21 -0700212 paths_to_replace[parser.options.parse_job] = container_result_dir
213
214 args = [paths_to_replace.get(arg, arg) for arg in args]
215
216 # Apply --use-existing-results, results directory is aready created and
217 # mounted in container. Apply this arg to avoid exception being raised.
218 if not '--use-existing-results' in args:
219 args.append('--use-existing-results')
220
221 # Make sure autoserv running in container using a different pid file.
222 if not '--pidfile-label' in args:
223 args.extend(['--pidfile-label', 'container_autoserv'])
224
Dan Shid1f51232015-04-18 00:29:14 -0700225 cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
Dan Shicf4d2032015-03-12 15:04:21 -0700226 logging.info('Run command in container: %s', cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700227 success = False
Dan Shicf4d2032015-03-12 15:04:21 -0700228 try:
229 test_container.attach_run(cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700230 success = True
Dan Shi9d3454e2015-12-08 09:16:08 -0800231 except Exception as e:
232 # If the test run inside container fails without generating any log,
233 # write a message to status.log to help troubleshooting.
234 debug_files = os.listdir(os.path.join(results, 'debug'))
235 if not debug_files:
236 job.record('FAIL', None, None,
237 'Failed to run test inside the container: %s. Check '
238 'logs in ssp_logs folder for more details.' % e)
239 raise
Dan Shicf4d2032015-03-12 15:04:21 -0700240 finally:
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -0800241 metrics.Counter(
242 'chromeos/autotest/experimental/execute_job_in_ssp').increment(
243 fields={'success': success})
Dan Shi37bee222015-04-13 15:46:47 -0700244 # metadata is uploaded separately so it can use http to upload.
245 metadata = {'drone': socket.gethostname(),
246 'job_id': job_id,
247 'success': success}
248 autotest_es.post(use_http=True,
249 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
250 metadata=metadata)
Dan Shicf4d2032015-03-12 15:04:21 -0700251 test_container.destroy()
252
253
Dan Shi3f1b8a52015-04-21 11:11:06 -0700254def correct_results_folder_permission(results):
255 """Make sure the results folder has the right permission settings.
256
257 For tests running with server-side packaging, the results folder has the
258 owner of root. This must be changed to the user running the autoserv
259 process, so parsing job can access the results folder.
260 TODO(dshi): crbug.com/459344 Remove this function when test container can be
261 unprivileged container.
262
263 @param results: Path to the results folder.
264
265 """
266 if not results:
267 return
268
Dan Shi32649b82015-08-29 20:53:36 -0700269 try:
270 utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
271 utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
272 except error.CmdError as e:
273 metadata = {'error': str(e),
274 'result_folder': results,
275 'drone': socket.gethostname()}
276 autotest_es.post(use_http=True, type_str='correct_results_folder_failure',
277 metadata=metadata)
278 raise
Dan Shi3f1b8a52015-04-21 11:11:06 -0700279
280
Dan Shia06f3e22015-09-03 16:15:15 -0700281def _start_servod(machine):
282 """Try to start servod in moblab if it's not already running or running with
283 different board or port.
284
285 @param machine: Name of the dut used for test.
286 """
287 if not utils.is_moblab():
288 return
289
Dan Shi1cded882015-09-23 16:52:26 -0700290 logging.debug('Trying to start servod.')
Dan Shia06f3e22015-09-03 16:15:15 -0700291 try:
Kevin Cheng9b6930f2016-07-20 14:57:15 -0700292 afe = frontend.AFE()
Dan Shia06f3e22015-09-03 16:15:15 -0700293 board = server_utils.get_board_from_afe(machine, afe)
294 hosts = afe.get_hosts(hostname=machine)
295 servo_host = hosts[0].attributes.get('servo_host', None)
296 servo_port = hosts[0].attributes.get('servo_port', 9999)
297 if not servo_host in ['localhost', '127.0.0.1']:
Dan Shi1cded882015-09-23 16:52:26 -0700298 logging.warn('Starting servod is aborted. The dut\'s servo_host '
299 'attribute is not set to localhost.')
Dan Shia06f3e22015-09-03 16:15:15 -0700300 return
301 except (urllib2.HTTPError, urllib2.URLError):
302 # Ignore error if RPC failed to get board
303 logging.error('Failed to get board name from AFE. Start servod is '
304 'aborted')
305 return
306
307 try:
308 pid = utils.run('pgrep servod').stdout
309 cmd_line = utils.run('ps -fp %s' % pid).stdout
310 if ('--board %s' % board in cmd_line and
311 '--port %s' % servo_port in cmd_line):
312 logging.debug('Servod is already running with given board and port.'
313 ' There is no need to restart servod.')
314 return
315 logging.debug('Servod is running with different board or port. '
316 'Stopping existing servod.')
317 utils.run('sudo stop servod')
318 except error.CmdError:
319 # servod is not running.
320 pass
321
322 try:
323 utils.run(START_SERVOD_CMD % (board, servo_port))
324 logging.debug('Servod is started')
325 except error.CmdError as e:
326 logging.error('Servod failed to be started, error: %s', e)
327
328
Dan Shic68fefb2015-04-07 10:10:52 -0700329def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700330 """Run server job with given options.
331
332 @param pid_file_manager: PidFileManager used to monitor the autoserv process
333 @param results: Folder to store results.
334 @param parser: Parser for the command line arguments.
335 @param ssp_url: Url to server-side package.
Dan Shic68fefb2015-04-07 10:10:52 -0700336 @param use_ssp: Set to True to run with server-side packaging.
Dan Shicf4d2032015-03-12 15:04:21 -0700337 """
Dan Shiec1d47d2015-02-13 11:38:13 -0800338 if parser.options.warn_no_ssp:
Dan Shic68fefb2015-04-07 10:10:52 -0700339 # Post a warning in the log.
Dan Shiec1d47d2015-02-13 11:38:13 -0800340 logging.warn('Autoserv is required to run with server-side packaging. '
341 'However, no drone is found to support server-side '
342 'packaging. The test will be executed in a drone without '
343 'server-side packaging supported.')
344
jadmanski0afbb632008-06-06 21:10:57 +0000345 # send stdin to /dev/null
346 dev_null = os.open(os.devnull, os.O_RDONLY)
347 os.dup2(dev_null, sys.stdin.fileno())
348 os.close(dev_null)
mblighdbf37612007-11-24 19:38:11 +0000349
Dan Shie8aeb662016-06-30 11:22:03 -0700350 # Create separate process group if the process is not a process group
351 # leader. This allows autoserv process to keep running after the caller
352 # process (drone manager call) exits.
353 if os.getpid() != os.getpgid(0):
354 os.setsid()
mbligh1d42d4e2007-11-05 22:42:00 +0000355
Dan Shicf4d2032015-03-12 15:04:21 -0700356 # Container name is predefined so the container can be destroyed in
357 # handle_sigterm.
358 job_or_task_id = job_directories.get_job_id_or_task_id(
359 parser.options.results)
360 container_name = (lxc.TEST_CONTAINER_NAME_FMT %
Dan Shid68d51c2015-04-21 17:00:42 -0700361 (job_or_task_id, time.time(), os.getpid()))
Dan Shiafa63872016-02-23 15:32:31 -0800362 job_folder = job_directories.get_job_folder_name(parser.options.results)
Dan Shicf4d2032015-03-12 15:04:21 -0700363
jadmanski0afbb632008-06-06 21:10:57 +0000364 # Implement SIGTERM handler
mblighc2299562009-07-02 19:00:36 +0000365 def handle_sigterm(signum, frame):
Simran Basi9d9b7292013-10-16 16:44:07 -0700366 logging.debug('Received SIGTERM')
mblighff7d61f2008-12-22 14:53:35 +0000367 if pid_file_manager:
368 pid_file_manager.close_file(1, signal.SIGTERM)
Simran Basi49e21e62013-10-17 12:40:33 -0700369 logging.debug('Finished writing to pid_file. Killing process.')
Dan Shi3f1b8a52015-04-21 11:11:06 -0700370
371 # Update results folder's file permission. This needs to be done ASAP
372 # before the parsing process tries to access the log.
373 if use_ssp and results:
374 correct_results_folder_permission(results)
375
Simran Basid6b83772014-01-06 16:31:30 -0800376 # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
377 # This sleep allows the pending output to be logged before the kill
378 # signal is sent.
379 time.sleep(.1)
Dan Shic68fefb2015-04-07 10:10:52 -0700380 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700381 logging.debug('Destroy container %s before aborting the autoserv '
382 'process.', container_name)
Dan Shi3f1b8a52015-04-21 11:11:06 -0700383 metadata = {'drone': socket.gethostname(),
384 'job_id': job_or_task_id,
385 'container_name': container_name,
386 'action': 'abort',
387 'success': True}
Dan Shicf4d2032015-03-12 15:04:21 -0700388 try:
389 bucket = lxc.ContainerBucket()
390 container = bucket.get(container_name)
391 if container:
392 container.destroy()
393 else:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700394 metadata['success'] = False
395 metadata['error'] = 'container not found'
Dan Shicf4d2032015-03-12 15:04:21 -0700396 logging.debug('Container %s is not found.', container_name)
397 except:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700398 metadata['success'] = False
Dan Shi65374e22016-09-15 16:14:05 -0700399 metadata['error'] = 'Exception: %s' % str(sys.exc_info())
Dan Shicf4d2032015-03-12 15:04:21 -0700400 # Handle any exception so the autoserv process can be aborted.
Dan Shi65374e22016-09-15 16:14:05 -0700401 logging.exception('Failed to destroy container %s.',
402 container_name)
Dan Shi3f1b8a52015-04-21 11:11:06 -0700403 autotest_es.post(use_http=True,
404 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
405 metadata=metadata)
Dan Shie4a4f9f2015-07-20 09:00:25 -0700406 # Try to correct the result file permission again after the
407 # container is destroyed, as the container might have created some
408 # new files in the result folder.
409 if results:
410 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700411
jadmanski0afbb632008-06-06 21:10:57 +0000412 os.killpg(os.getpgrp(), signal.SIGKILL)
mblighfaf0cd42007-11-19 16:00:24 +0000413
jadmanski0afbb632008-06-06 21:10:57 +0000414 # Set signal handler
mblighc2299562009-07-02 19:00:36 +0000415 signal.signal(signal.SIGTERM, handle_sigterm)
mbligha46678d2008-05-01 20:00:01 +0000416
Simran Basid6b83772014-01-06 16:31:30 -0800417 # faulthandler is only needed to debug in the Lab and is not avaliable to
418 # be imported in the chroot as part of VMTest, so Try-Except it.
419 try:
420 import faulthandler
421 faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
422 logging.debug('faulthandler registered on SIGTERM.')
423 except ImportError:
Christopher Grant4beca022015-06-16 15:14:47 -0400424 sys.exc_clear()
Simran Basid6b83772014-01-06 16:31:30 -0800425
David Rochberg8a60d1e2011-02-01 14:22:07 -0500426 # Ignore SIGTTOU's generated by output from forked children.
427 signal.signal(signal.SIGTTOU, signal.SIG_IGN)
428
Alex Millerf1af17e2013-01-09 22:50:32 -0800429 # If we received a SIGALARM, let's be loud about it.
430 signal.signal(signal.SIGALRM, log_alarm)
431
mbligha5f5e542009-12-30 16:57:49 +0000432 # Server side tests that call shell scripts often depend on $USER being set
433 # but depending on how you launch your autotest scheduler it may not be set.
434 os.environ['USER'] = getpass.getuser()
435
mblighb2bea302008-07-24 20:25:57 +0000436 label = parser.options.label
mbligh374f3412009-05-13 21:29:45 +0000437 group_name = parser.options.group_name
mblighb2bea302008-07-24 20:25:57 +0000438 user = parser.options.user
439 client = parser.options.client
440 server = parser.options.server
jadmanski0afbb632008-06-06 21:10:57 +0000441 install_before = parser.options.install_before
mblighb2bea302008-07-24 20:25:57 +0000442 install_after = parser.options.install_after
443 verify = parser.options.verify
444 repair = parser.options.repair
showard45ae8192008-11-05 19:32:53 +0000445 cleanup = parser.options.cleanup
Alex Millercb79ba72013-05-29 14:43:00 -0700446 provision = parser.options.provision
Dan Shi07e09af2013-04-12 09:31:29 -0700447 reset = parser.options.reset
Alex Miller667b5f22014-02-28 15:33:39 -0800448 job_labels = parser.options.job_labels
mblighb2bea302008-07-24 20:25:57 +0000449 no_tee = parser.options.no_tee
jadmanski0afbb632008-06-06 21:10:57 +0000450 parse_job = parser.options.parse_job
mblighe7d9c602009-07-02 19:02:33 +0000451 execution_tag = parser.options.execution_tag
452 if not execution_tag:
453 execution_tag = parse_job
jadmanski0afbb632008-06-06 21:10:57 +0000454 ssh_user = parser.options.ssh_user
455 ssh_port = parser.options.ssh_port
456 ssh_pass = parser.options.ssh_pass
jadmanskidef0c3c2009-03-25 20:07:10 +0000457 collect_crashinfo = parser.options.collect_crashinfo
mblighe0cbc912010-03-11 18:03:07 +0000458 control_filename = parser.options.control_filename
Scott Zawalski91493c82013-01-25 16:15:20 -0500459 test_retry = parser.options.test_retry
beepscb6f1e22013-06-28 19:14:10 -0700460 verify_job_repo_url = parser.options.verify_job_repo_url
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700461 skip_crash_collection = parser.options.skip_crash_collection
Aviv Keshet18ee3142013-08-12 15:01:51 -0700462 ssh_verbosity = int(parser.options.ssh_verbosity)
Fang Deng6cc20de2013-09-06 15:47:32 -0700463 ssh_options = parser.options.ssh_options
Dan Shib669cbd2013-09-13 11:17:17 -0700464 no_use_packaging = parser.options.no_use_packaging
Simran Basi1bf60eb2015-12-01 16:39:29 -0800465 host_attributes = parser.options.host_attributes
466 in_lab = bool(parser.options.lab)
mbligha46678d2008-05-01 20:00:01 +0000467
mblighb2bea302008-07-24 20:25:57 +0000468 # can't be both a client and a server side test
469 if client and server:
Eric Li861b2d52011-02-04 14:50:35 -0800470 parser.parser.error("Can not specify a test as both server and client!")
mblighb2bea302008-07-24 20:25:57 +0000471
Alex Millercb79ba72013-05-29 14:43:00 -0700472 if provision and client:
473 parser.parser.error("Cannot specify provisioning and client!")
474
475 is_special_task = (verify or repair or cleanup or collect_crashinfo or
Dan Shi07e09af2013-04-12 09:31:29 -0700476 provision or reset)
Alex Millercb79ba72013-05-29 14:43:00 -0700477 if len(parser.args) < 1 and not is_special_task:
Eric Li861b2d52011-02-04 14:50:35 -0800478 parser.parser.error("Missing argument: control file")
mbligha46678d2008-05-01 20:00:01 +0000479
Aviv Keshet18ee3142013-08-12 15:01:51 -0700480 if ssh_verbosity > 0:
481 # ssh_verbosity is an integer between 0 and 3, inclusive
482 ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
Fang Dengd1c2b732013-08-20 12:59:46 -0700483 else:
484 ssh_verbosity_flag = ''
Aviv Keshet18ee3142013-08-12 15:01:51 -0700485
showard45ae8192008-11-05 19:32:53 +0000486 # We have a control file unless it's just a verify/repair/cleanup job
jadmanski0afbb632008-06-06 21:10:57 +0000487 if len(parser.args) > 0:
488 control = parser.args[0]
489 else:
490 control = None
mbligha46678d2008-05-01 20:00:01 +0000491
Dan Shicf4d2032015-03-12 15:04:21 -0700492 machines = _get_machines(parser)
mbligh374f3412009-05-13 21:29:45 +0000493 if group_name and len(machines) < 2:
Dan Shicf4d2032015-03-12 15:04:21 -0700494 parser.parser.error('-G %r may only be supplied with more than one '
495 'machine.' % group_name)
mbligh374f3412009-05-13 21:29:45 +0000496
Christopher Wiley8a91f232013-07-09 11:02:27 -0700497 kwargs = {'group_name': group_name, 'tag': execution_tag,
Dan Shicf4d2032015-03-12 15:04:21 -0700498 'disable_sysinfo': parser.options.disable_sysinfo}
Dan Shi70647ca2015-07-16 22:52:35 -0700499 if parser.options.parent_job_id:
500 kwargs['parent_job_id'] = int(parser.options.parent_job_id)
mblighe0cbc912010-03-11 18:03:07 +0000501 if control_filename:
502 kwargs['control_filename'] = control_filename
Simran Basi1bf60eb2015-12-01 16:39:29 -0800503 if host_attributes:
504 kwargs['host_attributes'] = host_attributes
505 kwargs['in_lab'] = in_lab
jadmanski0afbb632008-06-06 21:10:57 +0000506 job = server_job.server_job(control, parser.args[1:], results, label,
507 user, machines, client, parse_job,
Fang Dengd1c2b732013-08-20 12:59:46 -0700508 ssh_user, ssh_port, ssh_pass,
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700509 ssh_verbosity_flag, ssh_options,
510 test_retry, **kwargs)
Dan Shicf4d2032015-03-12 15:04:21 -0700511
showard75cdfee2009-06-10 17:40:41 +0000512 job.logging.start_logging()
mbligh4608b002010-01-05 18:22:35 +0000513 job.init_parser()
mbligha46678d2008-05-01 20:00:01 +0000514
mbligh161fe6f2008-06-19 16:26:04 +0000515 # perform checks
516 job.precheck()
517
jadmanski0afbb632008-06-06 21:10:57 +0000518 # run the job
519 exit_code = 0
Dan Shic1b8bdd2015-09-14 23:11:24 -0700520 auto_start_servod = _CONFIG.get_config_value(
521 'AUTOSERV', 'auto_start_servod', type=bool, default=False)
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700522
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800523 site_utils.SetupTsMonGlobalState('autoserv', indirect=False,
524 short_lived=True)
jadmanski0afbb632008-06-06 21:10:57 +0000525 try:
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800526 try:
527 if repair:
528 if auto_start_servod and len(machines) == 1:
529 _start_servod(machines[0])
530 job.repair(job_labels)
531 elif verify:
532 job.verify(job_labels)
533 elif provision:
534 job.provision(job_labels)
535 elif reset:
536 job.reset(job_labels)
537 elif cleanup:
538 job.cleanup(job_labels)
539 else:
540 if auto_start_servod and len(machines) == 1:
541 _start_servod(machines[0])
542 if use_ssp:
543 try:
544 _run_with_ssp(job, container_name, job_or_task_id,
545 results, parser, ssp_url, job_folder,
546 machines)
547 finally:
548 # Update the ownership of files in result folder.
549 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700550 else:
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800551 if collect_crashinfo:
552 # Update the ownership of files in result folder. If the
553 # job to collect crashinfo was running inside container
554 # (SSP) and crashed before correcting folder permission,
555 # the result folder might have wrong permission setting.
Dan Shiafa63872016-02-23 15:32:31 -0800556 try:
557 correct_results_folder_permission(results)
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800558 except:
559 # Ignore any error as the user may not have root
560 # permission to run sudo command.
561 pass
562 job.run(install_before, install_after,
563 verify_job_repo_url=verify_job_repo_url,
564 only_collect_crashinfo=collect_crashinfo,
565 skip_crash_collection=skip_crash_collection,
566 job_labels=job_labels,
567 use_packaging=(not no_use_packaging))
568 finally:
569 while job.hosts:
570 host = job.hosts.pop()
571 host.close()
jadmanski0afbb632008-06-06 21:10:57 +0000572 except:
jadmanski27b37ea2008-10-29 23:54:31 +0000573 exit_code = 1
jadmanski0afbb632008-06-06 21:10:57 +0000574 traceback.print_exc()
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800575 finally:
576 metrics.Flush()
mbligha46678d2008-05-01 20:00:01 +0000577
mblighff7d61f2008-12-22 14:53:35 +0000578 if pid_file_manager:
579 pid_file_manager.num_tests_failed = job.num_tests_failed
580 pid_file_manager.close_file(exit_code)
jadmanskie0dffc32008-12-15 17:30:30 +0000581 job.cleanup_parser()
showard21baa452008-10-21 00:08:39 +0000582
jadmanski27b37ea2008-10-29 23:54:31 +0000583 sys.exit(exit_code)
mbligha46678d2008-05-01 20:00:01 +0000584
585
Fang Deng042c1472014-10-23 13:56:41 -0700586def record_autoserv(options, duration_secs):
587 """Record autoserv end-to-end time in metadata db.
588
589 @param options: parser options.
590 @param duration_secs: How long autoserv has taken, in secs.
591 """
592 # Get machine hostname
593 machines = options.machines.replace(
594 ',', ' ').strip().split() if options.machines else []
595 num_machines = len(machines)
596 if num_machines > 1:
597 # Skip the case where atomic group is used.
598 return
599 elif num_machines == 0:
600 machines.append('hostless')
601
602 # Determine the status that will be reported.
603 s = job_overhead.STATUS
604 task_mapping = {
605 'reset': s.RESETTING, 'verify': s.VERIFYING,
606 'provision': s.PROVISIONING, 'repair': s.REPAIRING,
607 'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
Dan Shi888cfca2015-07-31 15:49:00 -0700608 match = filter(lambda task: getattr(options, task, False) == True,
609 task_mapping)
Fang Deng042c1472014-10-23 13:56:41 -0700610 status = task_mapping[match[0]] if match else s.RUNNING
611 is_special_task = status not in [s.RUNNING, s.GATHERING]
Dan Shicf4d2032015-03-12 15:04:21 -0700612 job_or_task_id = job_directories.get_job_id_or_task_id(options.results)
Fang Deng042c1472014-10-23 13:56:41 -0700613 job_overhead.record_state_duration(
614 job_or_task_id, machines[0], status, duration_secs,
615 is_special_task=is_special_task)
616
617
mbligha46678d2008-05-01 20:00:01 +0000618def main():
Fang Deng042c1472014-10-23 13:56:41 -0700619 start_time = datetime.datetime.now()
jadmanski0afbb632008-06-06 21:10:57 +0000620 # grab the parser
621 parser = autoserv_parser.autoserv_parser
mbligha5cb4062009-02-17 15:53:39 +0000622 parser.parse_args()
mbligha46678d2008-05-01 20:00:01 +0000623
jadmanski0afbb632008-06-06 21:10:57 +0000624 if len(sys.argv) == 1:
625 parser.parser.print_help()
626 sys.exit(1)
mbligha6f13082008-06-05 23:53:46 +0000627
Dan Shicf4d2032015-03-12 15:04:21 -0700628 # If the job requires to run with server-side package, try to stage server-
629 # side package first. If that fails with error that autotest server package
Dan Shic68fefb2015-04-07 10:10:52 -0700630 # does not exist, fall back to run the job without using server-side
631 # packaging. If option warn_no_ssp is specified, that means autoserv is
632 # running in a drone does not support SSP, thus no need to stage server-side
633 # package.
Dan Shicf4d2032015-03-12 15:04:21 -0700634 ssp_url = None
Dan Shi0b754c52015-04-20 14:20:38 -0700635 ssp_url_warning = False
Dan Shic68fefb2015-04-07 10:10:52 -0700636 if (not parser.options.warn_no_ssp and parser.options.require_ssp):
Dan Shi14de7622016-08-22 11:09:06 -0700637 ssp_url, ssp_error_msg = _stage_ssp(parser)
Dan Shi0b754c52015-04-20 14:20:38 -0700638 # The build does not have autotest server package. Fall back to not
639 # to use server-side package. Logging is postponed until logging being
640 # set up.
641 ssp_url_warning = not ssp_url
Dan Shicf4d2032015-03-12 15:04:21 -0700642
showard75cdfee2009-06-10 17:40:41 +0000643 if parser.options.no_logging:
644 results = None
645 else:
646 results = parser.options.results
mbligh80e1eba2008-11-19 00:26:18 +0000647 if not results:
648 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
Dan Shi14de7622016-08-22 11:09:06 -0700649 results = os.path.abspath(results)
showard566d3c02010-01-12 18:57:01 +0000650 resultdir_exists = False
651 for filename in ('control.srv', 'status.log', '.autoserv_execute'):
652 if os.path.exists(os.path.join(results, filename)):
653 resultdir_exists = True
mbligh4608b002010-01-05 18:22:35 +0000654 if not parser.options.use_existing_results and resultdir_exists:
mbligh80e1eba2008-11-19 00:26:18 +0000655 error = "Error: results directory already exists: %s\n" % results
656 sys.stderr.write(error)
657 sys.exit(1)
mbligha788dc42009-03-26 21:10:16 +0000658
659 # Now that we certified that there's no leftover results dir from
660 # previous jobs, lets create the result dir since the logging system
661 # needs to create the log file in there.
662 if not os.path.isdir(results):
663 os.makedirs(results)
showard75cdfee2009-06-10 17:40:41 +0000664
Dan Shic68fefb2015-04-07 10:10:52 -0700665 # Server-side packaging will only be used if it's required and the package
666 # is available. If warn_no_ssp is specified, it means that autoserv is
667 # running in a drone does not have SSP supported and a warning will be logs.
668 # Therefore, it should not run with SSP.
669 use_ssp = (not parser.options.warn_no_ssp and parser.options.require_ssp
670 and ssp_url)
671 if use_ssp:
Dan Shie28de552015-05-06 16:51:58 -0700672 log_dir = os.path.join(results, 'ssp_logs') if results else None
Dan Shicf4d2032015-03-12 15:04:21 -0700673 if log_dir and not os.path.exists(log_dir):
674 os.makedirs(log_dir)
675 else:
676 log_dir = results
Dan Shi3f1b8a52015-04-21 11:11:06 -0700677
showard75cdfee2009-06-10 17:40:41 +0000678 logging_manager.configure_logging(
Dan Shicf4d2032015-03-12 15:04:21 -0700679 server_logging_config.ServerLoggingConfig(),
680 results_dir=log_dir,
showard10d84172009-06-18 23:16:50 +0000681 use_console=not parser.options.no_tee,
682 verbose=parser.options.verbose,
683 no_console_prefix=parser.options.no_console_prefix)
Dan Shicf4d2032015-03-12 15:04:21 -0700684
Dan Shi0b754c52015-04-20 14:20:38 -0700685 if ssp_url_warning:
686 logging.warn(
687 'Autoserv is required to run with server-side packaging. '
688 'However, no server-side package can be found based on '
Dan Shi6450e142016-03-11 11:52:20 -0800689 '`--image`, host attribute job_repo_url or host OS version '
690 'label. It could be that the build to test is older than the '
691 'minimum version that supports server-side packaging. The test '
Dan Shi14de7622016-08-22 11:09:06 -0700692 'will be executed without using erver-side packaging. '
693 'Following is the detailed error:\n%s', ssp_error_msg)
Dan Shi0b754c52015-04-20 14:20:38 -0700694
showard75cdfee2009-06-10 17:40:41 +0000695 if results:
mbligha788dc42009-03-26 21:10:16 +0000696 logging.info("Results placed in %s" % results)
mbligh10717632008-11-19 00:21:57 +0000697
mbligh4608b002010-01-05 18:22:35 +0000698 # wait until now to perform this check, so it get properly logged
Dan Shicf4d2032015-03-12 15:04:21 -0700699 if (parser.options.use_existing_results and not resultdir_exists and
Dan Shiff78f112015-06-12 13:34:02 -0700700 not utils.is_in_container()):
mbligh4608b002010-01-05 18:22:35 +0000701 logging.error("No existing results directory found: %s", results)
702 sys.exit(1)
703
Dan Shicf4d2032015-03-12 15:04:21 -0700704 logging.debug('autoserv is running in drone %s.', socket.gethostname())
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700705 logging.debug('autoserv command was: %s', ' '.join(sys.argv))
mbligh4608b002010-01-05 18:22:35 +0000706
Dan Shicf4d2032015-03-12 15:04:21 -0700707 if parser.options.write_pidfile and results:
mbligh4608b002010-01-05 18:22:35 +0000708 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
709 results)
jadmanskid5ab8c52008-12-03 16:27:07 +0000710 pid_file_manager.open_file()
mblighff7d61f2008-12-22 14:53:35 +0000711 else:
712 pid_file_manager = None
mbligha46678d2008-05-01 20:00:01 +0000713
jadmanskif22fea82008-11-26 20:57:07 +0000714 autotest.BaseAutotest.set_install_in_tmpdir(
715 parser.options.install_in_tmpdir)
716
Dan Shia1ecd5c2013-06-06 11:21:31 -0700717 try:
718 # Take the first argument as control file name, get the test name from
Prathmesh Prabhueb7cfc12016-11-23 16:37:30 -0800719 # the control file.
Dan Shia1ecd5c2013-06-06 11:21:31 -0700720 if (len(parser.args) > 0 and parser.args[0] != '' and
721 parser.options.machines):
Dan Shibbc16132013-07-09 16:23:59 -0700722 try:
723 test_name = control_data.parse_control(parser.args[0],
724 raise_warnings=True).name
725 except control_data.ControlVariableException:
726 logging.debug('Failed to retrieve test name from control file.')
727 test_name = None
Dan Shia1ecd5c2013-06-06 11:21:31 -0700728 except control_data.ControlVariableException as e:
729 logging.error(str(e))
jadmanski0afbb632008-06-06 21:10:57 +0000730 exit_code = 0
Prashanth B6285f6a2014-05-08 18:01:27 -0700731 # TODO(beeps): Extend this to cover different failure modes.
732 # Testing exceptions are matched against labels sent to autoserv. Eg,
733 # to allow only the hostless job to run, specify
734 # testing_exceptions: test_suite in the shadow_config. To allow both
735 # the hostless job and dummy_Pass to run, specify
736 # testing_exceptions: test_suite,dummy_Pass. You can figure out
737 # what label autoserv is invoked with by looking through the logs of a test
738 # for the autoserv command's -l option.
Dan Shia06f3e22015-09-03 16:15:15 -0700739 testing_exceptions = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700740 'AUTOSERV', 'testing_exceptions', type=list, default=[])
Dan Shia06f3e22015-09-03 16:15:15 -0700741 test_mode = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700742 'AUTOSERV', 'testing_mode', type=bool, default=False)
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800743 test_mode = (results_mocker and test_mode and not
744 any([ex in parser.options.label
745 for ex in testing_exceptions]))
746 is_task = (parser.options.verify or parser.options.repair or
747 parser.options.provision or parser.options.reset or
748 parser.options.cleanup or parser.options.collect_crashinfo)
jadmanski0afbb632008-06-06 21:10:57 +0000749 try:
750 try:
Prashanth B6285f6a2014-05-08 18:01:27 -0700751 if test_mode:
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800752 # The parser doesn't run on tasks anyway, so we can just return
753 # happy signals without faking results.
754 if not is_task:
755 machine = parser.options.results.split('/')[-1]
756
757 # TODO(beeps): The proper way to do this would be to
758 # refactor job creation so we can invoke job.record
759 # directly. To do that one needs to pipe the test_name
760 # through run_autoserv and bail just before invoking
761 # the server job. See the comment in
762 # puppylab/results_mocker for more context.
763 results_mocker.ResultsMocker(
Prashanth Balasubramanian22dd2262014-11-28 18:19:18 -0800764 test_name if test_name else 'unknown-test',
765 parser.options.results, machine
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800766 ).mock_results()
767 return
Prashanth B6285f6a2014-05-08 18:01:27 -0700768 else:
Dan Shic68fefb2015-04-07 10:10:52 -0700769 run_autoserv(pid_file_manager, results, parser, ssp_url,
770 use_ssp)
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700771 except SystemExit as e:
jadmanski0afbb632008-06-06 21:10:57 +0000772 exit_code = e.code
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700773 if exit_code:
774 logging.exception(e)
775 except Exception as e:
jadmanski0afbb632008-06-06 21:10:57 +0000776 # If we don't know what happened, we'll classify it as
777 # an 'abort' and return 1.
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700778 logging.exception(e)
jadmanski0afbb632008-06-06 21:10:57 +0000779 exit_code = 1
780 finally:
mblighff7d61f2008-12-22 14:53:35 +0000781 if pid_file_manager:
782 pid_file_manager.close_file(exit_code)
Fang Deng042c1472014-10-23 13:56:41 -0700783 # Record the autoserv duration time. Must be called
784 # just before the system exits to ensure accuracy.
785 duration_secs = (datetime.datetime.now() - start_time).total_seconds()
786 record_autoserv(parser.options, duration_secs)
jadmanski0afbb632008-06-06 21:10:57 +0000787 sys.exit(exit_code)
mblighfaf0cd42007-11-19 16:00:24 +0000788
mblighbb421852008-03-11 22:36:16 +0000789
mbligha46678d2008-05-01 20:00:01 +0000790if __name__ == '__main__':
jadmanski0afbb632008-06-06 21:10:57 +0000791 main()