blob: a6c442244e2a29c7d6443e65986c973aa2a07000 [file] [log] [blame]
mbligh6203ace2007-10-04 21:54:24 +00001#!/usr/bin/python -u
mbligh1ffd5dc2008-11-25 13:24:05 +00002# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
mbligh82648e52008-11-20 16:54:25 +00003# Released under the GPL v2
mblighdcd57a82007-07-11 23:06:47 +00004
mblighc8949b82007-07-23 16:33:58 +00005"""
Aviv Keshetde6bb192013-01-30 16:17:22 -08006Run a control file through the server side engine
mblighdcd57a82007-07-11 23:06:47 +00007"""
mbligh1ffd5dc2008-11-25 13:24:05 +00008
Fang Deng042c1472014-10-23 13:56:41 -07009import datetime
Paul Hobbs20cc72a2016-08-30 16:57:05 -070010import contextlib
Fang Deng042c1472014-10-23 13:56:41 -070011import getpass
12import logging
13import os
14import re
15import signal
Dan Shicf4d2032015-03-12 15:04:21 -070016import socket
Fang Deng042c1472014-10-23 13:56:41 -070017import sys
18import traceback
19import time
20import urllib2
mbligh1ffd5dc2008-11-25 13:24:05 +000021
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080022
mblighf5427bb2008-04-09 15:55:57 +000023import common
Dan Shia1ecd5c2013-06-06 11:21:31 -070024from autotest_lib.client.common_lib import control_data
Dan Shi32649b82015-08-29 20:53:36 -070025from autotest_lib.client.common_lib import error
Dan Shia1ecd5c2013-06-06 11:21:31 -070026from autotest_lib.client.common_lib import global_config
Dan Shi5ddf6a32015-05-02 00:22:01 -070027from autotest_lib.client.common_lib import utils
Dan Shi37bee222015-04-13 15:46:47 -070028from autotest_lib.client.common_lib.cros.graphite import autotest_es
Dan Shiffd5b822017-07-14 11:16:23 -070029from autotest_lib.server import site_utils
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080030
Dan Shi5e2efb72017-02-07 11:40:23 -080031try:
32 from chromite.lib import metrics
33except ImportError:
34 metrics = utils.metrics_mock
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080035
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -080036try:
37 from autotest_lib.puppylab import results_mocker
38except ImportError:
39 results_mocker = None
40
Dan Shia06f3e22015-09-03 16:15:15 -070041_CONFIG = global_config.global_config
42
Dan Shia1ecd5c2013-06-06 11:21:31 -070043
Jakob Jueliche497b552014-09-23 19:11:59 -070044# Number of seconds to wait before returning if testing mode is enabled
Prashanth B6285f6a2014-05-08 18:01:27 -070045TESTING_MODE_SLEEP_SECS = 1
Jakob Jueliche497b552014-09-23 19:11:59 -070046
mbligh9ff89cd2009-09-03 20:28:17 +000047
Kevin Cheng9b6930f2016-07-20 14:57:15 -070048from autotest_lib.server import frontend
showard75cdfee2009-06-10 17:40:41 +000049from autotest_lib.server import server_logging_config
showard043c62a2009-06-10 19:48:57 +000050from autotest_lib.server import server_job, utils, autoserv_parser, autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070051from autotest_lib.server import utils as server_utils
Paul Hobbs20cc72a2016-08-30 16:57:05 -070052from autotest_lib.server import site_utils
Kevin Chengadc99f92016-07-20 08:21:58 -070053from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Dan Shicf4d2032015-03-12 15:04:21 -070054from autotest_lib.site_utils import job_directories
Fang Deng042c1472014-10-23 13:56:41 -070055from autotest_lib.site_utils import job_overhead
Dan Shicf4d2032015-03-12 15:04:21 -070056from autotest_lib.site_utils import lxc
Ben Kwa966db082017-06-05 14:17:23 -070057from autotest_lib.site_utils.lxc import utils as lxc_utils
showard75cdfee2009-06-10 17:40:41 +000058from autotest_lib.client.common_lib import pidfile, logging_manager
mbligh92c0fc22008-11-20 16:52:23 +000059
Paul Hobbs20cc72a2016-08-30 16:57:05 -070060
Dan Shicf4d2032015-03-12 15:04:21 -070061# Control segment to stage server-side package.
62STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
63 'stage_server_side_package')
64
Dan Shia06f3e22015-09-03 16:15:15 -070065# Command line to start servod in a moblab.
66START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s'
67STOP_SERVOD_CMD = 'sudo stop servod'
68
Alex Millerf1af17e2013-01-09 22:50:32 -080069def log_alarm(signum, frame):
70 logging.error("Received SIGALARM. Ignoring and continuing on.")
Alex Miller0528d6f2013-01-11 10:49:48 -080071 sys.exit(1)
Alex Millerf1af17e2013-01-09 22:50:32 -080072
Dan Shicf4d2032015-03-12 15:04:21 -070073
74def _get_machines(parser):
75 """Get a list of machine names from command line arg -m or a file.
76
77 @param parser: Parser for the command line arguments.
78
79 @return: A list of machine names from command line arg -m or the
80 machines file specified in the command line arg -M.
81 """
82 if parser.options.machines:
83 machines = parser.options.machines.replace(',', ' ').strip().split()
84 else:
85 machines = []
86 machines_file = parser.options.machines_file
87 if machines_file:
88 machines = []
89 for m in open(machines_file, 'r').readlines():
90 # remove comments, spaces
91 m = re.sub('#.*', '', m).strip()
92 if m:
93 machines.append(m)
94 logging.debug('Read list of machines from file: %s', machines_file)
95 logging.debug('Machines: %s', ','.join(machines))
96
97 if machines:
98 for machine in machines:
99 if not machine or re.search('\s', machine):
100 parser.parser.error("Invalid machine: %s" % str(machine))
101 machines = list(set(machines))
102 machines.sort()
103 return machines
104
105
106def _stage_ssp(parser):
107 """Stage server-side package.
108
109 This function calls a control segment to stage server-side package based on
110 the job and autoserv command line option. The detail implementation could
111 be different for each host type. Currently, only CrosHost has
112 stage_server_side_package function defined.
113 The script returns None if no server-side package is available. However,
114 it may raise exception if it failed for reasons other than artifact (the
115 server-side package) not found.
116
117 @param parser: Command line arguments parser passed in the autoserv process.
118
Dan Shi14de7622016-08-22 11:09:06 -0700119 @return: (ssp_url, error_msg), where
120 ssp_url is a url to the autotest server-side package. None if
121 server-side package is not supported.
122 error_msg is a string indicating the failures. None if server-
123 side package is staged successfully.
Dan Shicf4d2032015-03-12 15:04:21 -0700124 """
Kevin Chengadc99f92016-07-20 08:21:58 -0700125 machines_list = _get_machines(parser)
Prathmesh Prabhucbebd982017-01-10 15:59:43 -0800126 machines_list = server_job.get_machine_dicts(
127 machines_list, parser.options.lab, parser.options.host_attributes)
Kevin Chengadc99f92016-07-20 08:21:58 -0700128
Dan Shi36cfd832014-10-10 13:38:51 -0700129 # If test_source_build is not specified, default to use server-side test
130 # code from build specified in --image.
Kevin Chengadc99f92016-07-20 08:21:58 -0700131 namespace = {'machines': machines_list,
Dan Shi36cfd832014-10-10 13:38:51 -0700132 'image': (parser.options.test_source_build or
133 parser.options.image),}
Dan Shicf4d2032015-03-12 15:04:21 -0700134 script_locals = {}
135 execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
Dan Shi14de7622016-08-22 11:09:06 -0700136 return script_locals['ssp_url'], script_locals['error_msg']
Dan Shicf4d2032015-03-12 15:04:21 -0700137
138
Dan Shiafa63872016-02-23 15:32:31 -0800139def _run_with_ssp(job, container_name, job_id, results, parser, ssp_url,
Dan Shi3be35af2016-08-25 23:22:40 -0700140 job_folder, machines):
Dan Shicf4d2032015-03-12 15:04:21 -0700141 """Run the server job with server-side packaging.
142
Dan Shi37befda2015-12-07 13:16:56 -0800143 @param job: The server job object.
Dan Shicf4d2032015-03-12 15:04:21 -0700144 @param container_name: Name of the container to run the test.
145 @param job_id: ID of the test job.
146 @param results: Folder to store results. This could be different from
147 parser.options.results:
148 parser.options.results can be set to None for results to be
149 stored in a temp folder.
150 results can be None for autoserv run requires no logging.
151 @param parser: Command line parser that contains the options.
152 @param ssp_url: url of the staged server-side package.
Dan Shiafa63872016-02-23 15:32:31 -0800153 @param job_folder: Name of the job result folder.
Dan Shi3be35af2016-08-25 23:22:40 -0700154 @param machines: A list of machines to run the test.
Dan Shicf4d2032015-03-12 15:04:21 -0700155 """
156 bucket = lxc.ContainerBucket()
157 control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
158 else None)
Dan Shi37befda2015-12-07 13:16:56 -0800159 try:
Dan Shi3be35af2016-08-25 23:22:40 -0700160 dut_name = machines[0] if len(machines) >= 1 else None
Dan Shi37befda2015-12-07 13:16:56 -0800161 test_container = bucket.setup_test(container_name, job_id, ssp_url,
Dan Shiafa63872016-02-23 15:32:31 -0800162 results, control=control,
Dan Shi3be35af2016-08-25 23:22:40 -0700163 job_folder=job_folder,
164 dut_name=dut_name)
Dan Shi37befda2015-12-07 13:16:56 -0800165 except Exception as e:
166 job.record('FAIL', None, None,
167 'Failed to setup container for test: %s. Check logs in '
168 'ssp_logs folder for more details.' % e)
169 raise
170
Dan Shicf4d2032015-03-12 15:04:21 -0700171 args = sys.argv[:]
172 args.remove('--require-ssp')
Dan Shi77b79a62015-07-29 16:22:05 -0700173 # --parent_job_id is only useful in autoserv running in host, not in
174 # container. Include this argument will cause test to fail for builds before
175 # CL 286265 was merged.
176 if '--parent_job_id' in args:
177 index = args.index('--parent_job_id')
178 args.remove('--parent_job_id')
179 # Remove the actual parent job id in command line arg.
180 del args[index]
Dan Shicf4d2032015-03-12 15:04:21 -0700181
182 # A dictionary of paths to replace in the command line. Key is the path to
183 # be replaced with the one in value.
184 paths_to_replace = {}
185 # Replace the control file path with the one in container.
186 if control:
187 container_control_filename = os.path.join(
188 lxc.CONTROL_TEMP_PATH, os.path.basename(control))
189 paths_to_replace[control] = container_control_filename
190 # Update result directory with the one in container.
Dan Shi65374e22016-09-15 16:14:05 -0700191 container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_folder)
Dan Shicf4d2032015-03-12 15:04:21 -0700192 if parser.options.results:
Dan Shicf4d2032015-03-12 15:04:21 -0700193 paths_to_replace[parser.options.results] = container_result_dir
194 # Update parse_job directory with the one in container. The assumption is
195 # that the result folder to be parsed is always the same as the results_dir.
196 if parser.options.parse_job:
Dan Shicf4d2032015-03-12 15:04:21 -0700197 paths_to_replace[parser.options.parse_job] = container_result_dir
198
199 args = [paths_to_replace.get(arg, arg) for arg in args]
200
201 # Apply --use-existing-results, results directory is aready created and
202 # mounted in container. Apply this arg to avoid exception being raised.
203 if not '--use-existing-results' in args:
204 args.append('--use-existing-results')
205
206 # Make sure autoserv running in container using a different pid file.
207 if not '--pidfile-label' in args:
208 args.extend(['--pidfile-label', 'container_autoserv'])
209
Dan Shid1f51232015-04-18 00:29:14 -0700210 cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
Dan Shicf4d2032015-03-12 15:04:21 -0700211 logging.info('Run command in container: %s', cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700212 success = False
Dan Shicf4d2032015-03-12 15:04:21 -0700213 try:
214 test_container.attach_run(cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700215 success = True
Dan Shi9d3454e2015-12-08 09:16:08 -0800216 except Exception as e:
217 # If the test run inside container fails without generating any log,
218 # write a message to status.log to help troubleshooting.
219 debug_files = os.listdir(os.path.join(results, 'debug'))
220 if not debug_files:
221 job.record('FAIL', None, None,
222 'Failed to run test inside the container: %s. Check '
223 'logs in ssp_logs folder for more details.' % e)
224 raise
Dan Shicf4d2032015-03-12 15:04:21 -0700225 finally:
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -0800226 metrics.Counter(
227 'chromeos/autotest/experimental/execute_job_in_ssp').increment(
228 fields={'success': success})
Dan Shi37bee222015-04-13 15:46:47 -0700229 # metadata is uploaded separately so it can use http to upload.
230 metadata = {'drone': socket.gethostname(),
231 'job_id': job_id,
232 'success': success}
233 autotest_es.post(use_http=True,
234 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
235 metadata=metadata)
Dan Shicf4d2032015-03-12 15:04:21 -0700236 test_container.destroy()
237
238
Dan Shi3f1b8a52015-04-21 11:11:06 -0700239def correct_results_folder_permission(results):
240 """Make sure the results folder has the right permission settings.
241
242 For tests running with server-side packaging, the results folder has the
243 owner of root. This must be changed to the user running the autoserv
244 process, so parsing job can access the results folder.
245 TODO(dshi): crbug.com/459344 Remove this function when test container can be
246 unprivileged container.
247
248 @param results: Path to the results folder.
249
250 """
251 if not results:
252 return
253
Dan Shi32649b82015-08-29 20:53:36 -0700254 try:
255 utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
256 utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
257 except error.CmdError as e:
258 metadata = {'error': str(e),
259 'result_folder': results,
260 'drone': socket.gethostname()}
261 autotest_es.post(use_http=True, type_str='correct_results_folder_failure',
262 metadata=metadata)
263 raise
Dan Shi3f1b8a52015-04-21 11:11:06 -0700264
265
Dan Shia06f3e22015-09-03 16:15:15 -0700266def _start_servod(machine):
267 """Try to start servod in moblab if it's not already running or running with
268 different board or port.
269
270 @param machine: Name of the dut used for test.
271 """
272 if not utils.is_moblab():
273 return
274
Dan Shi1cded882015-09-23 16:52:26 -0700275 logging.debug('Trying to start servod.')
Dan Shia06f3e22015-09-03 16:15:15 -0700276 try:
Kevin Cheng9b6930f2016-07-20 14:57:15 -0700277 afe = frontend.AFE()
Dan Shia06f3e22015-09-03 16:15:15 -0700278 board = server_utils.get_board_from_afe(machine, afe)
279 hosts = afe.get_hosts(hostname=machine)
280 servo_host = hosts[0].attributes.get('servo_host', None)
281 servo_port = hosts[0].attributes.get('servo_port', 9999)
282 if not servo_host in ['localhost', '127.0.0.1']:
Dan Shi1cded882015-09-23 16:52:26 -0700283 logging.warn('Starting servod is aborted. The dut\'s servo_host '
284 'attribute is not set to localhost.')
Dan Shia06f3e22015-09-03 16:15:15 -0700285 return
286 except (urllib2.HTTPError, urllib2.URLError):
287 # Ignore error if RPC failed to get board
288 logging.error('Failed to get board name from AFE. Start servod is '
289 'aborted')
290 return
291
292 try:
293 pid = utils.run('pgrep servod').stdout
294 cmd_line = utils.run('ps -fp %s' % pid).stdout
295 if ('--board %s' % board in cmd_line and
296 '--port %s' % servo_port in cmd_line):
297 logging.debug('Servod is already running with given board and port.'
298 ' There is no need to restart servod.')
299 return
300 logging.debug('Servod is running with different board or port. '
301 'Stopping existing servod.')
302 utils.run('sudo stop servod')
303 except error.CmdError:
304 # servod is not running.
305 pass
306
307 try:
308 utils.run(START_SERVOD_CMD % (board, servo_port))
309 logging.debug('Servod is started')
310 except error.CmdError as e:
311 logging.error('Servod failed to be started, error: %s', e)
312
313
Dan Shic68fefb2015-04-07 10:10:52 -0700314def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700315 """Run server job with given options.
316
317 @param pid_file_manager: PidFileManager used to monitor the autoserv process
318 @param results: Folder to store results.
319 @param parser: Parser for the command line arguments.
320 @param ssp_url: Url to server-side package.
Dan Shic68fefb2015-04-07 10:10:52 -0700321 @param use_ssp: Set to True to run with server-side packaging.
Dan Shicf4d2032015-03-12 15:04:21 -0700322 """
Dan Shiec1d47d2015-02-13 11:38:13 -0800323 if parser.options.warn_no_ssp:
Dan Shic68fefb2015-04-07 10:10:52 -0700324 # Post a warning in the log.
Dan Shiec1d47d2015-02-13 11:38:13 -0800325 logging.warn('Autoserv is required to run with server-side packaging. '
326 'However, no drone is found to support server-side '
327 'packaging. The test will be executed in a drone without '
328 'server-side packaging supported.')
329
jadmanski0afbb632008-06-06 21:10:57 +0000330 # send stdin to /dev/null
331 dev_null = os.open(os.devnull, os.O_RDONLY)
332 os.dup2(dev_null, sys.stdin.fileno())
333 os.close(dev_null)
mblighdbf37612007-11-24 19:38:11 +0000334
Dan Shie8aeb662016-06-30 11:22:03 -0700335 # Create separate process group if the process is not a process group
336 # leader. This allows autoserv process to keep running after the caller
337 # process (drone manager call) exits.
338 if os.getpid() != os.getpgid(0):
339 os.setsid()
mbligh1d42d4e2007-11-05 22:42:00 +0000340
Dan Shicf4d2032015-03-12 15:04:21 -0700341 # Container name is predefined so the container can be destroyed in
342 # handle_sigterm.
343 job_or_task_id = job_directories.get_job_id_or_task_id(
344 parser.options.results)
345 container_name = (lxc.TEST_CONTAINER_NAME_FMT %
Dan Shid68d51c2015-04-21 17:00:42 -0700346 (job_or_task_id, time.time(), os.getpid()))
Dan Shiafa63872016-02-23 15:32:31 -0800347 job_folder = job_directories.get_job_folder_name(parser.options.results)
Dan Shicf4d2032015-03-12 15:04:21 -0700348
jadmanski0afbb632008-06-06 21:10:57 +0000349 # Implement SIGTERM handler
mblighc2299562009-07-02 19:00:36 +0000350 def handle_sigterm(signum, frame):
Simran Basi9d9b7292013-10-16 16:44:07 -0700351 logging.debug('Received SIGTERM')
mblighff7d61f2008-12-22 14:53:35 +0000352 if pid_file_manager:
353 pid_file_manager.close_file(1, signal.SIGTERM)
Simran Basi49e21e62013-10-17 12:40:33 -0700354 logging.debug('Finished writing to pid_file. Killing process.')
Dan Shi3f1b8a52015-04-21 11:11:06 -0700355
356 # Update results folder's file permission. This needs to be done ASAP
357 # before the parsing process tries to access the log.
358 if use_ssp and results:
359 correct_results_folder_permission(results)
360
Simran Basid6b83772014-01-06 16:31:30 -0800361 # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
362 # This sleep allows the pending output to be logged before the kill
363 # signal is sent.
364 time.sleep(.1)
Dan Shic68fefb2015-04-07 10:10:52 -0700365 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700366 logging.debug('Destroy container %s before aborting the autoserv '
367 'process.', container_name)
Dan Shi3f1b8a52015-04-21 11:11:06 -0700368 metadata = {'drone': socket.gethostname(),
369 'job_id': job_or_task_id,
370 'container_name': container_name,
371 'action': 'abort',
372 'success': True}
Dan Shicf4d2032015-03-12 15:04:21 -0700373 try:
374 bucket = lxc.ContainerBucket()
375 container = bucket.get(container_name)
376 if container:
377 container.destroy()
378 else:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700379 metadata['success'] = False
380 metadata['error'] = 'container not found'
Dan Shicf4d2032015-03-12 15:04:21 -0700381 logging.debug('Container %s is not found.', container_name)
382 except:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700383 metadata['success'] = False
Dan Shi65374e22016-09-15 16:14:05 -0700384 metadata['error'] = 'Exception: %s' % str(sys.exc_info())
Dan Shicf4d2032015-03-12 15:04:21 -0700385 # Handle any exception so the autoserv process can be aborted.
Dan Shi65374e22016-09-15 16:14:05 -0700386 logging.exception('Failed to destroy container %s.',
387 container_name)
Dan Shi3f1b8a52015-04-21 11:11:06 -0700388 autotest_es.post(use_http=True,
389 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
390 metadata=metadata)
Dan Shie4a4f9f2015-07-20 09:00:25 -0700391 # Try to correct the result file permission again after the
392 # container is destroyed, as the container might have created some
393 # new files in the result folder.
394 if results:
395 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700396
jadmanski0afbb632008-06-06 21:10:57 +0000397 os.killpg(os.getpgrp(), signal.SIGKILL)
mblighfaf0cd42007-11-19 16:00:24 +0000398
jadmanski0afbb632008-06-06 21:10:57 +0000399 # Set signal handler
mblighc2299562009-07-02 19:00:36 +0000400 signal.signal(signal.SIGTERM, handle_sigterm)
mbligha46678d2008-05-01 20:00:01 +0000401
Simran Basid6b83772014-01-06 16:31:30 -0800402 # faulthandler is only needed to debug in the Lab and is not avaliable to
403 # be imported in the chroot as part of VMTest, so Try-Except it.
404 try:
405 import faulthandler
406 faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
407 logging.debug('faulthandler registered on SIGTERM.')
408 except ImportError:
Christopher Grant4beca022015-06-16 15:14:47 -0400409 sys.exc_clear()
Simran Basid6b83772014-01-06 16:31:30 -0800410
David Rochberg8a60d1e2011-02-01 14:22:07 -0500411 # Ignore SIGTTOU's generated by output from forked children.
412 signal.signal(signal.SIGTTOU, signal.SIG_IGN)
413
Alex Millerf1af17e2013-01-09 22:50:32 -0800414 # If we received a SIGALARM, let's be loud about it.
415 signal.signal(signal.SIGALRM, log_alarm)
416
mbligha5f5e542009-12-30 16:57:49 +0000417 # Server side tests that call shell scripts often depend on $USER being set
418 # but depending on how you launch your autotest scheduler it may not be set.
419 os.environ['USER'] = getpass.getuser()
420
mblighb2bea302008-07-24 20:25:57 +0000421 label = parser.options.label
mbligh374f3412009-05-13 21:29:45 +0000422 group_name = parser.options.group_name
mblighb2bea302008-07-24 20:25:57 +0000423 user = parser.options.user
424 client = parser.options.client
425 server = parser.options.server
jadmanski0afbb632008-06-06 21:10:57 +0000426 install_before = parser.options.install_before
mblighb2bea302008-07-24 20:25:57 +0000427 install_after = parser.options.install_after
428 verify = parser.options.verify
429 repair = parser.options.repair
showard45ae8192008-11-05 19:32:53 +0000430 cleanup = parser.options.cleanup
Alex Millercb79ba72013-05-29 14:43:00 -0700431 provision = parser.options.provision
Dan Shi07e09af2013-04-12 09:31:29 -0700432 reset = parser.options.reset
Alex Miller667b5f22014-02-28 15:33:39 -0800433 job_labels = parser.options.job_labels
mblighb2bea302008-07-24 20:25:57 +0000434 no_tee = parser.options.no_tee
jadmanski0afbb632008-06-06 21:10:57 +0000435 parse_job = parser.options.parse_job
mblighe7d9c602009-07-02 19:02:33 +0000436 execution_tag = parser.options.execution_tag
437 if not execution_tag:
438 execution_tag = parse_job
jadmanski0afbb632008-06-06 21:10:57 +0000439 ssh_user = parser.options.ssh_user
440 ssh_port = parser.options.ssh_port
441 ssh_pass = parser.options.ssh_pass
jadmanskidef0c3c2009-03-25 20:07:10 +0000442 collect_crashinfo = parser.options.collect_crashinfo
mblighe0cbc912010-03-11 18:03:07 +0000443 control_filename = parser.options.control_filename
Scott Zawalski91493c82013-01-25 16:15:20 -0500444 test_retry = parser.options.test_retry
beepscb6f1e22013-06-28 19:14:10 -0700445 verify_job_repo_url = parser.options.verify_job_repo_url
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700446 skip_crash_collection = parser.options.skip_crash_collection
Aviv Keshet18ee3142013-08-12 15:01:51 -0700447 ssh_verbosity = int(parser.options.ssh_verbosity)
Fang Deng6cc20de2013-09-06 15:47:32 -0700448 ssh_options = parser.options.ssh_options
Dan Shib669cbd2013-09-13 11:17:17 -0700449 no_use_packaging = parser.options.no_use_packaging
Simran Basi1bf60eb2015-12-01 16:39:29 -0800450 host_attributes = parser.options.host_attributes
451 in_lab = bool(parser.options.lab)
mbligha46678d2008-05-01 20:00:01 +0000452
mblighb2bea302008-07-24 20:25:57 +0000453 # can't be both a client and a server side test
454 if client and server:
Eric Li861b2d52011-02-04 14:50:35 -0800455 parser.parser.error("Can not specify a test as both server and client!")
mblighb2bea302008-07-24 20:25:57 +0000456
Alex Millercb79ba72013-05-29 14:43:00 -0700457 if provision and client:
458 parser.parser.error("Cannot specify provisioning and client!")
459
460 is_special_task = (verify or repair or cleanup or collect_crashinfo or
Dan Shi07e09af2013-04-12 09:31:29 -0700461 provision or reset)
Alex Millercb79ba72013-05-29 14:43:00 -0700462 if len(parser.args) < 1 and not is_special_task:
Eric Li861b2d52011-02-04 14:50:35 -0800463 parser.parser.error("Missing argument: control file")
mbligha46678d2008-05-01 20:00:01 +0000464
Aviv Keshet18ee3142013-08-12 15:01:51 -0700465 if ssh_verbosity > 0:
466 # ssh_verbosity is an integer between 0 and 3, inclusive
467 ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
Fang Dengd1c2b732013-08-20 12:59:46 -0700468 else:
469 ssh_verbosity_flag = ''
Aviv Keshet18ee3142013-08-12 15:01:51 -0700470
showard45ae8192008-11-05 19:32:53 +0000471 # We have a control file unless it's just a verify/repair/cleanup job
jadmanski0afbb632008-06-06 21:10:57 +0000472 if len(parser.args) > 0:
473 control = parser.args[0]
474 else:
475 control = None
mbligha46678d2008-05-01 20:00:01 +0000476
Dan Shicf4d2032015-03-12 15:04:21 -0700477 machines = _get_machines(parser)
mbligh374f3412009-05-13 21:29:45 +0000478 if group_name and len(machines) < 2:
Dan Shicf4d2032015-03-12 15:04:21 -0700479 parser.parser.error('-G %r may only be supplied with more than one '
480 'machine.' % group_name)
mbligh374f3412009-05-13 21:29:45 +0000481
Christopher Wiley8a91f232013-07-09 11:02:27 -0700482 kwargs = {'group_name': group_name, 'tag': execution_tag,
Dan Shicf4d2032015-03-12 15:04:21 -0700483 'disable_sysinfo': parser.options.disable_sysinfo}
Dan Shi70647ca2015-07-16 22:52:35 -0700484 if parser.options.parent_job_id:
485 kwargs['parent_job_id'] = int(parser.options.parent_job_id)
mblighe0cbc912010-03-11 18:03:07 +0000486 if control_filename:
487 kwargs['control_filename'] = control_filename
Simran Basi1bf60eb2015-12-01 16:39:29 -0800488 if host_attributes:
489 kwargs['host_attributes'] = host_attributes
490 kwargs['in_lab'] = in_lab
jadmanski0afbb632008-06-06 21:10:57 +0000491 job = server_job.server_job(control, parser.args[1:], results, label,
492 user, machines, client, parse_job,
Fang Dengd1c2b732013-08-20 12:59:46 -0700493 ssh_user, ssh_port, ssh_pass,
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700494 ssh_verbosity_flag, ssh_options,
495 test_retry, **kwargs)
Dan Shicf4d2032015-03-12 15:04:21 -0700496
showard75cdfee2009-06-10 17:40:41 +0000497 job.logging.start_logging()
mbligh4608b002010-01-05 18:22:35 +0000498 job.init_parser()
mbligha46678d2008-05-01 20:00:01 +0000499
mbligh161fe6f2008-06-19 16:26:04 +0000500 # perform checks
501 job.precheck()
502
jadmanski0afbb632008-06-06 21:10:57 +0000503 # run the job
504 exit_code = 0
Dan Shic1b8bdd2015-09-14 23:11:24 -0700505 auto_start_servod = _CONFIG.get_config_value(
506 'AUTOSERV', 'auto_start_servod', type=bool, default=False)
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700507
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800508 site_utils.SetupTsMonGlobalState('autoserv', indirect=False,
509 short_lived=True)
jadmanski0afbb632008-06-06 21:10:57 +0000510 try:
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800511 try:
512 if repair:
513 if auto_start_servod and len(machines) == 1:
514 _start_servod(machines[0])
515 job.repair(job_labels)
516 elif verify:
517 job.verify(job_labels)
518 elif provision:
519 job.provision(job_labels)
520 elif reset:
521 job.reset(job_labels)
522 elif cleanup:
523 job.cleanup(job_labels)
524 else:
525 if auto_start_servod and len(machines) == 1:
526 _start_servod(machines[0])
527 if use_ssp:
528 try:
529 _run_with_ssp(job, container_name, job_or_task_id,
530 results, parser, ssp_url, job_folder,
531 machines)
532 finally:
533 # Update the ownership of files in result folder.
534 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700535 else:
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800536 if collect_crashinfo:
537 # Update the ownership of files in result folder. If the
538 # job to collect crashinfo was running inside container
539 # (SSP) and crashed before correcting folder permission,
540 # the result folder might have wrong permission setting.
Dan Shiafa63872016-02-23 15:32:31 -0800541 try:
542 correct_results_folder_permission(results)
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800543 except:
544 # Ignore any error as the user may not have root
545 # permission to run sudo command.
546 pass
Aviv Keshet92bf7b62017-02-13 15:34:03 -0800547 metric_name = ('chromeos/autotest/experimental/'
548 'autoserv_job_run_duration')
549 f = {'in_container': utils.is_in_container(),
550 'success': False}
551 with metrics.SecondsTimer(metric_name, fields=f) as c:
552 job.run(install_before, install_after,
553 verify_job_repo_url=verify_job_repo_url,
554 only_collect_crashinfo=collect_crashinfo,
555 skip_crash_collection=skip_crash_collection,
556 job_labels=job_labels,
557 use_packaging=(not no_use_packaging))
558 c['success'] = True
559
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800560 finally:
Hidehiko Abe06893302017-06-24 07:32:38 +0900561 job.close()
Dan Shiffd5b822017-07-14 11:16:23 -0700562 # Special task doesn't run parse, so result summary needs to be
563 # built here.
564 if results and (repair or verify or reset or cleanup or provision):
565 site_utils.collect_result_sizes(results)
jadmanski0afbb632008-06-06 21:10:57 +0000566 except:
jadmanski27b37ea2008-10-29 23:54:31 +0000567 exit_code = 1
jadmanski0afbb632008-06-06 21:10:57 +0000568 traceback.print_exc()
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800569 finally:
570 metrics.Flush()
mbligha46678d2008-05-01 20:00:01 +0000571
mblighff7d61f2008-12-22 14:53:35 +0000572 if pid_file_manager:
573 pid_file_manager.num_tests_failed = job.num_tests_failed
574 pid_file_manager.close_file(exit_code)
jadmanskie0dffc32008-12-15 17:30:30 +0000575 job.cleanup_parser()
showard21baa452008-10-21 00:08:39 +0000576
jadmanski27b37ea2008-10-29 23:54:31 +0000577 sys.exit(exit_code)
mbligha46678d2008-05-01 20:00:01 +0000578
579
Fang Deng042c1472014-10-23 13:56:41 -0700580def record_autoserv(options, duration_secs):
581 """Record autoserv end-to-end time in metadata db.
582
583 @param options: parser options.
584 @param duration_secs: How long autoserv has taken, in secs.
585 """
586 # Get machine hostname
587 machines = options.machines.replace(
588 ',', ' ').strip().split() if options.machines else []
589 num_machines = len(machines)
590 if num_machines > 1:
591 # Skip the case where atomic group is used.
592 return
593 elif num_machines == 0:
594 machines.append('hostless')
595
596 # Determine the status that will be reported.
597 s = job_overhead.STATUS
598 task_mapping = {
599 'reset': s.RESETTING, 'verify': s.VERIFYING,
600 'provision': s.PROVISIONING, 'repair': s.REPAIRING,
601 'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
Dan Shi888cfca2015-07-31 15:49:00 -0700602 match = filter(lambda task: getattr(options, task, False) == True,
603 task_mapping)
Fang Deng042c1472014-10-23 13:56:41 -0700604 status = task_mapping[match[0]] if match else s.RUNNING
605 is_special_task = status not in [s.RUNNING, s.GATHERING]
Dan Shicf4d2032015-03-12 15:04:21 -0700606 job_or_task_id = job_directories.get_job_id_or_task_id(options.results)
Fang Deng042c1472014-10-23 13:56:41 -0700607 job_overhead.record_state_duration(
608 job_or_task_id, machines[0], status, duration_secs,
609 is_special_task=is_special_task)
610
611
mbligha46678d2008-05-01 20:00:01 +0000612def main():
Fang Deng042c1472014-10-23 13:56:41 -0700613 start_time = datetime.datetime.now()
jadmanski0afbb632008-06-06 21:10:57 +0000614 # grab the parser
615 parser = autoserv_parser.autoserv_parser
mbligha5cb4062009-02-17 15:53:39 +0000616 parser.parse_args()
mbligha46678d2008-05-01 20:00:01 +0000617
jadmanski0afbb632008-06-06 21:10:57 +0000618 if len(sys.argv) == 1:
619 parser.parser.print_help()
620 sys.exit(1)
mbligha6f13082008-06-05 23:53:46 +0000621
showard75cdfee2009-06-10 17:40:41 +0000622 if parser.options.no_logging:
623 results = None
624 else:
625 results = parser.options.results
mbligh80e1eba2008-11-19 00:26:18 +0000626 if not results:
627 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
Dan Shi14de7622016-08-22 11:09:06 -0700628 results = os.path.abspath(results)
showard566d3c02010-01-12 18:57:01 +0000629 resultdir_exists = False
630 for filename in ('control.srv', 'status.log', '.autoserv_execute'):
631 if os.path.exists(os.path.join(results, filename)):
632 resultdir_exists = True
mbligh4608b002010-01-05 18:22:35 +0000633 if not parser.options.use_existing_results and resultdir_exists:
mbligh80e1eba2008-11-19 00:26:18 +0000634 error = "Error: results directory already exists: %s\n" % results
635 sys.stderr.write(error)
636 sys.exit(1)
mbligha788dc42009-03-26 21:10:16 +0000637
638 # Now that we certified that there's no leftover results dir from
639 # previous jobs, lets create the result dir since the logging system
640 # needs to create the log file in there.
641 if not os.path.isdir(results):
642 os.makedirs(results)
showard75cdfee2009-06-10 17:40:41 +0000643
Prathmesh Prabhu7ae68ae2017-06-15 00:29:31 -0700644 # If the job requires to run with server-side package, try to stage server-
645 # side package first. If that fails with error that autotest server package
646 # does not exist, fall back to run the job without using server-side
647 # packaging. If option warn_no_ssp is specified, that means autoserv is
648 # running in a drone does not support SSP, thus no need to stage server-side
649 # package.
650 ssp_url = None
651 ssp_url_warning = False
652 if (not parser.options.warn_no_ssp and parser.options.require_ssp):
653 ssp_url, ssp_error_msg = _stage_ssp(parser)
654 # The build does not have autotest server package. Fall back to not
655 # to use server-side package. Logging is postponed until logging being
656 # set up.
657 ssp_url_warning = not ssp_url
658
Dan Shic68fefb2015-04-07 10:10:52 -0700659 # Server-side packaging will only be used if it's required and the package
660 # is available. If warn_no_ssp is specified, it means that autoserv is
661 # running in a drone does not have SSP supported and a warning will be logs.
662 # Therefore, it should not run with SSP.
663 use_ssp = (not parser.options.warn_no_ssp and parser.options.require_ssp
664 and ssp_url)
665 if use_ssp:
Dan Shie28de552015-05-06 16:51:58 -0700666 log_dir = os.path.join(results, 'ssp_logs') if results else None
Dan Shicf4d2032015-03-12 15:04:21 -0700667 if log_dir and not os.path.exists(log_dir):
668 os.makedirs(log_dir)
669 else:
670 log_dir = results
Dan Shi3f1b8a52015-04-21 11:11:06 -0700671
showard75cdfee2009-06-10 17:40:41 +0000672 logging_manager.configure_logging(
Dan Shicf4d2032015-03-12 15:04:21 -0700673 server_logging_config.ServerLoggingConfig(),
674 results_dir=log_dir,
showard10d84172009-06-18 23:16:50 +0000675 use_console=not parser.options.no_tee,
676 verbose=parser.options.verbose,
677 no_console_prefix=parser.options.no_console_prefix)
Dan Shicf4d2032015-03-12 15:04:21 -0700678
Dan Shi0b754c52015-04-20 14:20:38 -0700679 if ssp_url_warning:
680 logging.warn(
681 'Autoserv is required to run with server-side packaging. '
682 'However, no server-side package can be found based on '
Dan Shi6450e142016-03-11 11:52:20 -0800683 '`--image`, host attribute job_repo_url or host OS version '
684 'label. It could be that the build to test is older than the '
685 'minimum version that supports server-side packaging. The test '
Dan Shi14de7622016-08-22 11:09:06 -0700686 'will be executed without using erver-side packaging. '
687 'Following is the detailed error:\n%s', ssp_error_msg)
Dan Shi0b754c52015-04-20 14:20:38 -0700688
showard75cdfee2009-06-10 17:40:41 +0000689 if results:
mbligha788dc42009-03-26 21:10:16 +0000690 logging.info("Results placed in %s" % results)
mbligh10717632008-11-19 00:21:57 +0000691
mbligh4608b002010-01-05 18:22:35 +0000692 # wait until now to perform this check, so it get properly logged
Dan Shicf4d2032015-03-12 15:04:21 -0700693 if (parser.options.use_existing_results and not resultdir_exists and
Dan Shiff78f112015-06-12 13:34:02 -0700694 not utils.is_in_container()):
mbligh4608b002010-01-05 18:22:35 +0000695 logging.error("No existing results directory found: %s", results)
696 sys.exit(1)
697
Dan Shicf4d2032015-03-12 15:04:21 -0700698 logging.debug('autoserv is running in drone %s.', socket.gethostname())
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700699 logging.debug('autoserv command was: %s', ' '.join(sys.argv))
mbligh4608b002010-01-05 18:22:35 +0000700
Dan Shicf4d2032015-03-12 15:04:21 -0700701 if parser.options.write_pidfile and results:
mbligh4608b002010-01-05 18:22:35 +0000702 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
703 results)
jadmanskid5ab8c52008-12-03 16:27:07 +0000704 pid_file_manager.open_file()
mblighff7d61f2008-12-22 14:53:35 +0000705 else:
706 pid_file_manager = None
mbligha46678d2008-05-01 20:00:01 +0000707
jadmanskif22fea82008-11-26 20:57:07 +0000708 autotest.BaseAutotest.set_install_in_tmpdir(
709 parser.options.install_in_tmpdir)
710
Dan Shia1ecd5c2013-06-06 11:21:31 -0700711 try:
712 # Take the first argument as control file name, get the test name from
Prathmesh Prabhueb7cfc12016-11-23 16:37:30 -0800713 # the control file.
Dan Shia1ecd5c2013-06-06 11:21:31 -0700714 if (len(parser.args) > 0 and parser.args[0] != '' and
715 parser.options.machines):
Dan Shibbc16132013-07-09 16:23:59 -0700716 try:
717 test_name = control_data.parse_control(parser.args[0],
718 raise_warnings=True).name
719 except control_data.ControlVariableException:
720 logging.debug('Failed to retrieve test name from control file.')
721 test_name = None
Dan Shia1ecd5c2013-06-06 11:21:31 -0700722 except control_data.ControlVariableException as e:
723 logging.error(str(e))
jadmanski0afbb632008-06-06 21:10:57 +0000724 exit_code = 0
Prashanth B6285f6a2014-05-08 18:01:27 -0700725 # TODO(beeps): Extend this to cover different failure modes.
726 # Testing exceptions are matched against labels sent to autoserv. Eg,
727 # to allow only the hostless job to run, specify
728 # testing_exceptions: test_suite in the shadow_config. To allow both
729 # the hostless job and dummy_Pass to run, specify
730 # testing_exceptions: test_suite,dummy_Pass. You can figure out
731 # what label autoserv is invoked with by looking through the logs of a test
732 # for the autoserv command's -l option.
Dan Shia06f3e22015-09-03 16:15:15 -0700733 testing_exceptions = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700734 'AUTOSERV', 'testing_exceptions', type=list, default=[])
Dan Shia06f3e22015-09-03 16:15:15 -0700735 test_mode = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700736 'AUTOSERV', 'testing_mode', type=bool, default=False)
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800737 test_mode = (results_mocker and test_mode and not
738 any([ex in parser.options.label
739 for ex in testing_exceptions]))
740 is_task = (parser.options.verify or parser.options.repair or
741 parser.options.provision or parser.options.reset or
742 parser.options.cleanup or parser.options.collect_crashinfo)
jadmanski0afbb632008-06-06 21:10:57 +0000743 try:
744 try:
Prashanth B6285f6a2014-05-08 18:01:27 -0700745 if test_mode:
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800746 # The parser doesn't run on tasks anyway, so we can just return
747 # happy signals without faking results.
748 if not is_task:
749 machine = parser.options.results.split('/')[-1]
750
751 # TODO(beeps): The proper way to do this would be to
752 # refactor job creation so we can invoke job.record
753 # directly. To do that one needs to pipe the test_name
754 # through run_autoserv and bail just before invoking
755 # the server job. See the comment in
756 # puppylab/results_mocker for more context.
757 results_mocker.ResultsMocker(
Prashanth Balasubramanian22dd2262014-11-28 18:19:18 -0800758 test_name if test_name else 'unknown-test',
759 parser.options.results, machine
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800760 ).mock_results()
761 return
Prashanth B6285f6a2014-05-08 18:01:27 -0700762 else:
Dan Shic68fefb2015-04-07 10:10:52 -0700763 run_autoserv(pid_file_manager, results, parser, ssp_url,
764 use_ssp)
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700765 except SystemExit as e:
jadmanski0afbb632008-06-06 21:10:57 +0000766 exit_code = e.code
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700767 if exit_code:
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700768 logging.exception('Uncaught SystemExit with code %s', exit_code)
769 except Exception:
jadmanski0afbb632008-06-06 21:10:57 +0000770 # If we don't know what happened, we'll classify it as
771 # an 'abort' and return 1.
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700772 logging.exception('Uncaught Exception, exit_code = 1.')
jadmanski0afbb632008-06-06 21:10:57 +0000773 exit_code = 1
774 finally:
mblighff7d61f2008-12-22 14:53:35 +0000775 if pid_file_manager:
776 pid_file_manager.close_file(exit_code)
Fang Deng042c1472014-10-23 13:56:41 -0700777 # Record the autoserv duration time. Must be called
778 # just before the system exits to ensure accuracy.
779 duration_secs = (datetime.datetime.now() - start_time).total_seconds()
780 record_autoserv(parser.options, duration_secs)
jadmanski0afbb632008-06-06 21:10:57 +0000781 sys.exit(exit_code)
mblighfaf0cd42007-11-19 16:00:24 +0000782
mblighbb421852008-03-11 22:36:16 +0000783
mbligha46678d2008-05-01 20:00:01 +0000784if __name__ == '__main__':
jadmanski0afbb632008-06-06 21:10:57 +0000785 main()