blob: df16e6ab6c82e1e4d6c9ae7de82626d95505c713 [file] [log] [blame]
mbligh6203ace2007-10-04 21:54:24 +00001#!/usr/bin/python -u
mbligh1ffd5dc2008-11-25 13:24:05 +00002# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
mbligh82648e52008-11-20 16:54:25 +00003# Released under the GPL v2
mblighdcd57a82007-07-11 23:06:47 +00004
mblighc8949b82007-07-23 16:33:58 +00005"""
Aviv Keshetde6bb192013-01-30 16:17:22 -08006Run a control file through the server side engine
mblighdcd57a82007-07-11 23:06:47 +00007"""
mbligh1ffd5dc2008-11-25 13:24:05 +00008
Fang Deng042c1472014-10-23 13:56:41 -07009import datetime
Paul Hobbs20cc72a2016-08-30 16:57:05 -070010import contextlib
Fang Deng042c1472014-10-23 13:56:41 -070011import getpass
12import logging
13import os
14import re
15import signal
Dan Shicf4d2032015-03-12 15:04:21 -070016import socket
Fang Deng042c1472014-10-23 13:56:41 -070017import sys
18import traceback
19import time
20import urllib2
mbligh1ffd5dc2008-11-25 13:24:05 +000021
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080022
mblighf5427bb2008-04-09 15:55:57 +000023import common
Dan Shi4f8c0242017-07-07 15:34:49 -070024from autotest_lib.client.bin.result_tools import utils as result_utils
25from autotest_lib.client.bin.result_tools import view as result_view
Dan Shia1ecd5c2013-06-06 11:21:31 -070026from autotest_lib.client.common_lib import control_data
Dan Shi32649b82015-08-29 20:53:36 -070027from autotest_lib.client.common_lib import error
Dan Shia1ecd5c2013-06-06 11:21:31 -070028from autotest_lib.client.common_lib import global_config
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080029
Dan Shi5e2efb72017-02-07 11:40:23 -080030try:
31 from chromite.lib import metrics
32except ImportError:
33 metrics = utils.metrics_mock
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080034
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -080035try:
36 from autotest_lib.puppylab import results_mocker
37except ImportError:
38 results_mocker = None
39
Dan Shia06f3e22015-09-03 16:15:15 -070040_CONFIG = global_config.global_config
41
Dan Shia1ecd5c2013-06-06 11:21:31 -070042
Jakob Jueliche497b552014-09-23 19:11:59 -070043# Number of seconds to wait before returning if testing mode is enabled
Prashanth B6285f6a2014-05-08 18:01:27 -070044TESTING_MODE_SLEEP_SECS = 1
Jakob Jueliche497b552014-09-23 19:11:59 -070045
mbligh9ff89cd2009-09-03 20:28:17 +000046
Kevin Cheng9b6930f2016-07-20 14:57:15 -070047from autotest_lib.server import frontend
showard75cdfee2009-06-10 17:40:41 +000048from autotest_lib.server import server_logging_config
showard043c62a2009-06-10 19:48:57 +000049from autotest_lib.server import server_job, utils, autoserv_parser, autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070050from autotest_lib.server import utils as server_utils
Paul Hobbs20cc72a2016-08-30 16:57:05 -070051from autotest_lib.server import site_utils
Kevin Chengadc99f92016-07-20 08:21:58 -070052from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Dan Shicf4d2032015-03-12 15:04:21 -070053from autotest_lib.site_utils import job_directories
Fang Deng042c1472014-10-23 13:56:41 -070054from autotest_lib.site_utils import job_overhead
Dan Shicf4d2032015-03-12 15:04:21 -070055from autotest_lib.site_utils import lxc
Ben Kwa966db082017-06-05 14:17:23 -070056from autotest_lib.site_utils.lxc import utils as lxc_utils
showard75cdfee2009-06-10 17:40:41 +000057from autotest_lib.client.common_lib import pidfile, logging_manager
mbligh92c0fc22008-11-20 16:52:23 +000058
Paul Hobbs20cc72a2016-08-30 16:57:05 -070059
Dan Shicf4d2032015-03-12 15:04:21 -070060# Control segment to stage server-side package.
61STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
62 'stage_server_side_package')
63
Dan Shia06f3e22015-09-03 16:15:15 -070064# Command line to start servod in a moblab.
65START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s'
66STOP_SERVOD_CMD = 'sudo stop servod'
67
Alex Millerf1af17e2013-01-09 22:50:32 -080068def log_alarm(signum, frame):
69 logging.error("Received SIGALARM. Ignoring and continuing on.")
Alex Miller0528d6f2013-01-11 10:49:48 -080070 sys.exit(1)
Alex Millerf1af17e2013-01-09 22:50:32 -080071
Dan Shicf4d2032015-03-12 15:04:21 -070072
73def _get_machines(parser):
74 """Get a list of machine names from command line arg -m or a file.
75
76 @param parser: Parser for the command line arguments.
77
78 @return: A list of machine names from command line arg -m or the
79 machines file specified in the command line arg -M.
80 """
81 if parser.options.machines:
82 machines = parser.options.machines.replace(',', ' ').strip().split()
83 else:
84 machines = []
85 machines_file = parser.options.machines_file
86 if machines_file:
87 machines = []
88 for m in open(machines_file, 'r').readlines():
89 # remove comments, spaces
90 m = re.sub('#.*', '', m).strip()
91 if m:
92 machines.append(m)
93 logging.debug('Read list of machines from file: %s', machines_file)
94 logging.debug('Machines: %s', ','.join(machines))
95
96 if machines:
97 for machine in machines:
98 if not machine or re.search('\s', machine):
99 parser.parser.error("Invalid machine: %s" % str(machine))
100 machines = list(set(machines))
101 machines.sort()
102 return machines
103
104
Prathmesh Prabhu588007d2017-06-15 00:31:31 -0700105def _stage_ssp(parser, resultsdir):
Dan Shicf4d2032015-03-12 15:04:21 -0700106 """Stage server-side package.
107
108 This function calls a control segment to stage server-side package based on
109 the job and autoserv command line option. The detail implementation could
110 be different for each host type. Currently, only CrosHost has
111 stage_server_side_package function defined.
112 The script returns None if no server-side package is available. However,
113 it may raise exception if it failed for reasons other than artifact (the
114 server-side package) not found.
115
116 @param parser: Command line arguments parser passed in the autoserv process.
Prathmesh Prabhu588007d2017-06-15 00:31:31 -0700117 @param resultsdir: Folder to store results. This could be different from
118 parser.options.results: parser.options.results can be set to None
119 for results to be stored in a temp folder. resultsdir can be None
120 for autoserv run requires no logging.
Dan Shicf4d2032015-03-12 15:04:21 -0700121
Dan Shi14de7622016-08-22 11:09:06 -0700122 @return: (ssp_url, error_msg), where
123 ssp_url is a url to the autotest server-side package. None if
124 server-side package is not supported.
125 error_msg is a string indicating the failures. None if server-
126 side package is staged successfully.
Dan Shicf4d2032015-03-12 15:04:21 -0700127 """
Kevin Chengadc99f92016-07-20 08:21:58 -0700128 machines_list = _get_machines(parser)
Prathmesh Prabhu588007d2017-06-15 00:31:31 -0700129 machines_list = server_job.get_machine_dicts(machines_list, resultsdir,
130 parser.options.lab,
131 parser.options.host_attributes)
Kevin Chengadc99f92016-07-20 08:21:58 -0700132
Dan Shi36cfd832014-10-10 13:38:51 -0700133 # If test_source_build is not specified, default to use server-side test
134 # code from build specified in --image.
Kevin Chengadc99f92016-07-20 08:21:58 -0700135 namespace = {'machines': machines_list,
Dan Shi36cfd832014-10-10 13:38:51 -0700136 'image': (parser.options.test_source_build or
137 parser.options.image),}
Dan Shicf4d2032015-03-12 15:04:21 -0700138 script_locals = {}
139 execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
Dan Shi14de7622016-08-22 11:09:06 -0700140 return script_locals['ssp_url'], script_locals['error_msg']
Dan Shicf4d2032015-03-12 15:04:21 -0700141
142
Dan Shiafa63872016-02-23 15:32:31 -0800143def _run_with_ssp(job, container_name, job_id, results, parser, ssp_url,
Dan Shi3be35af2016-08-25 23:22:40 -0700144 job_folder, machines):
Dan Shicf4d2032015-03-12 15:04:21 -0700145 """Run the server job with server-side packaging.
146
Dan Shi37befda2015-12-07 13:16:56 -0800147 @param job: The server job object.
Dan Shicf4d2032015-03-12 15:04:21 -0700148 @param container_name: Name of the container to run the test.
149 @param job_id: ID of the test job.
150 @param results: Folder to store results. This could be different from
151 parser.options.results:
152 parser.options.results can be set to None for results to be
153 stored in a temp folder.
154 results can be None for autoserv run requires no logging.
155 @param parser: Command line parser that contains the options.
156 @param ssp_url: url of the staged server-side package.
Dan Shiafa63872016-02-23 15:32:31 -0800157 @param job_folder: Name of the job result folder.
Dan Shi3be35af2016-08-25 23:22:40 -0700158 @param machines: A list of machines to run the test.
Dan Shicf4d2032015-03-12 15:04:21 -0700159 """
160 bucket = lxc.ContainerBucket()
161 control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
162 else None)
Dan Shi37befda2015-12-07 13:16:56 -0800163 try:
Dan Shi3be35af2016-08-25 23:22:40 -0700164 dut_name = machines[0] if len(machines) >= 1 else None
Dan Shi37befda2015-12-07 13:16:56 -0800165 test_container = bucket.setup_test(container_name, job_id, ssp_url,
Dan Shiafa63872016-02-23 15:32:31 -0800166 results, control=control,
Dan Shi3be35af2016-08-25 23:22:40 -0700167 job_folder=job_folder,
168 dut_name=dut_name)
Dan Shi37befda2015-12-07 13:16:56 -0800169 except Exception as e:
170 job.record('FAIL', None, None,
171 'Failed to setup container for test: %s. Check logs in '
172 'ssp_logs folder for more details.' % e)
173 raise
174
Dan Shicf4d2032015-03-12 15:04:21 -0700175 args = sys.argv[:]
176 args.remove('--require-ssp')
Dan Shi77b79a62015-07-29 16:22:05 -0700177 # --parent_job_id is only useful in autoserv running in host, not in
178 # container. Include this argument will cause test to fail for builds before
179 # CL 286265 was merged.
180 if '--parent_job_id' in args:
181 index = args.index('--parent_job_id')
182 args.remove('--parent_job_id')
183 # Remove the actual parent job id in command line arg.
184 del args[index]
Dan Shicf4d2032015-03-12 15:04:21 -0700185
186 # A dictionary of paths to replace in the command line. Key is the path to
187 # be replaced with the one in value.
188 paths_to_replace = {}
189 # Replace the control file path with the one in container.
190 if control:
191 container_control_filename = os.path.join(
192 lxc.CONTROL_TEMP_PATH, os.path.basename(control))
193 paths_to_replace[control] = container_control_filename
194 # Update result directory with the one in container.
Dan Shi65374e22016-09-15 16:14:05 -0700195 container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_folder)
Dan Shicf4d2032015-03-12 15:04:21 -0700196 if parser.options.results:
Dan Shicf4d2032015-03-12 15:04:21 -0700197 paths_to_replace[parser.options.results] = container_result_dir
198 # Update parse_job directory with the one in container. The assumption is
199 # that the result folder to be parsed is always the same as the results_dir.
200 if parser.options.parse_job:
Dan Shicf4d2032015-03-12 15:04:21 -0700201 paths_to_replace[parser.options.parse_job] = container_result_dir
202
203 args = [paths_to_replace.get(arg, arg) for arg in args]
204
205 # Apply --use-existing-results, results directory is aready created and
206 # mounted in container. Apply this arg to avoid exception being raised.
207 if not '--use-existing-results' in args:
208 args.append('--use-existing-results')
209
210 # Make sure autoserv running in container using a different pid file.
211 if not '--pidfile-label' in args:
212 args.extend(['--pidfile-label', 'container_autoserv'])
213
Dan Shid1f51232015-04-18 00:29:14 -0700214 cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
Dan Shicf4d2032015-03-12 15:04:21 -0700215 logging.info('Run command in container: %s', cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700216 success = False
Dan Shicf4d2032015-03-12 15:04:21 -0700217 try:
218 test_container.attach_run(cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700219 success = True
Dan Shi9d3454e2015-12-08 09:16:08 -0800220 except Exception as e:
221 # If the test run inside container fails without generating any log,
222 # write a message to status.log to help troubleshooting.
223 debug_files = os.listdir(os.path.join(results, 'debug'))
224 if not debug_files:
225 job.record('FAIL', None, None,
226 'Failed to run test inside the container: %s. Check '
227 'logs in ssp_logs folder for more details.' % e)
228 raise
Dan Shicf4d2032015-03-12 15:04:21 -0700229 finally:
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -0800230 metrics.Counter(
231 'chromeos/autotest/experimental/execute_job_in_ssp').increment(
232 fields={'success': success})
Dan Shicf4d2032015-03-12 15:04:21 -0700233 test_container.destroy()
234
235
Dan Shi3f1b8a52015-04-21 11:11:06 -0700236def correct_results_folder_permission(results):
237 """Make sure the results folder has the right permission settings.
238
239 For tests running with server-side packaging, the results folder has the
240 owner of root. This must be changed to the user running the autoserv
241 process, so parsing job can access the results folder.
242 TODO(dshi): crbug.com/459344 Remove this function when test container can be
243 unprivileged container.
244
245 @param results: Path to the results folder.
246
247 """
248 if not results:
249 return
250
Aviv Keshetc03de792017-07-18 14:24:31 -0700251 utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
252 utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
Dan Shi3f1b8a52015-04-21 11:11:06 -0700253
254
Dan Shia06f3e22015-09-03 16:15:15 -0700255def _start_servod(machine):
256 """Try to start servod in moblab if it's not already running or running with
257 different board or port.
258
259 @param machine: Name of the dut used for test.
260 """
261 if not utils.is_moblab():
262 return
263
Dan Shi1cded882015-09-23 16:52:26 -0700264 logging.debug('Trying to start servod.')
Dan Shia06f3e22015-09-03 16:15:15 -0700265 try:
Kevin Cheng9b6930f2016-07-20 14:57:15 -0700266 afe = frontend.AFE()
Dan Shia06f3e22015-09-03 16:15:15 -0700267 board = server_utils.get_board_from_afe(machine, afe)
268 hosts = afe.get_hosts(hostname=machine)
269 servo_host = hosts[0].attributes.get('servo_host', None)
270 servo_port = hosts[0].attributes.get('servo_port', 9999)
271 if not servo_host in ['localhost', '127.0.0.1']:
Dan Shi1cded882015-09-23 16:52:26 -0700272 logging.warn('Starting servod is aborted. The dut\'s servo_host '
273 'attribute is not set to localhost.')
Dan Shia06f3e22015-09-03 16:15:15 -0700274 return
275 except (urllib2.HTTPError, urllib2.URLError):
276 # Ignore error if RPC failed to get board
277 logging.error('Failed to get board name from AFE. Start servod is '
278 'aborted')
279 return
280
281 try:
282 pid = utils.run('pgrep servod').stdout
283 cmd_line = utils.run('ps -fp %s' % pid).stdout
284 if ('--board %s' % board in cmd_line and
285 '--port %s' % servo_port in cmd_line):
286 logging.debug('Servod is already running with given board and port.'
287 ' There is no need to restart servod.')
288 return
289 logging.debug('Servod is running with different board or port. '
290 'Stopping existing servod.')
291 utils.run('sudo stop servod')
292 except error.CmdError:
293 # servod is not running.
294 pass
295
296 try:
297 utils.run(START_SERVOD_CMD % (board, servo_port))
298 logging.debug('Servod is started')
299 except error.CmdError as e:
300 logging.error('Servod failed to be started, error: %s', e)
301
302
Dan Shic68fefb2015-04-07 10:10:52 -0700303def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700304 """Run server job with given options.
305
306 @param pid_file_manager: PidFileManager used to monitor the autoserv process
307 @param results: Folder to store results.
308 @param parser: Parser for the command line arguments.
309 @param ssp_url: Url to server-side package.
Dan Shic68fefb2015-04-07 10:10:52 -0700310 @param use_ssp: Set to True to run with server-side packaging.
Dan Shicf4d2032015-03-12 15:04:21 -0700311 """
Dan Shiec1d47d2015-02-13 11:38:13 -0800312 if parser.options.warn_no_ssp:
Dan Shic68fefb2015-04-07 10:10:52 -0700313 # Post a warning in the log.
Dan Shiec1d47d2015-02-13 11:38:13 -0800314 logging.warn('Autoserv is required to run with server-side packaging. '
315 'However, no drone is found to support server-side '
316 'packaging. The test will be executed in a drone without '
317 'server-side packaging supported.')
318
jadmanski0afbb632008-06-06 21:10:57 +0000319 # send stdin to /dev/null
320 dev_null = os.open(os.devnull, os.O_RDONLY)
321 os.dup2(dev_null, sys.stdin.fileno())
322 os.close(dev_null)
mblighdbf37612007-11-24 19:38:11 +0000323
Dan Shie8aeb662016-06-30 11:22:03 -0700324 # Create separate process group if the process is not a process group
325 # leader. This allows autoserv process to keep running after the caller
326 # process (drone manager call) exits.
327 if os.getpid() != os.getpgid(0):
328 os.setsid()
mbligh1d42d4e2007-11-05 22:42:00 +0000329
Dan Shicf4d2032015-03-12 15:04:21 -0700330 # Container name is predefined so the container can be destroyed in
331 # handle_sigterm.
332 job_or_task_id = job_directories.get_job_id_or_task_id(
333 parser.options.results)
334 container_name = (lxc.TEST_CONTAINER_NAME_FMT %
Dan Shid68d51c2015-04-21 17:00:42 -0700335 (job_or_task_id, time.time(), os.getpid()))
Dan Shiafa63872016-02-23 15:32:31 -0800336 job_folder = job_directories.get_job_folder_name(parser.options.results)
Dan Shicf4d2032015-03-12 15:04:21 -0700337
jadmanski0afbb632008-06-06 21:10:57 +0000338 # Implement SIGTERM handler
mblighc2299562009-07-02 19:00:36 +0000339 def handle_sigterm(signum, frame):
Simran Basi9d9b7292013-10-16 16:44:07 -0700340 logging.debug('Received SIGTERM')
mblighff7d61f2008-12-22 14:53:35 +0000341 if pid_file_manager:
342 pid_file_manager.close_file(1, signal.SIGTERM)
Simran Basi49e21e62013-10-17 12:40:33 -0700343 logging.debug('Finished writing to pid_file. Killing process.')
Dan Shi3f1b8a52015-04-21 11:11:06 -0700344
345 # Update results folder's file permission. This needs to be done ASAP
346 # before the parsing process tries to access the log.
347 if use_ssp and results:
348 correct_results_folder_permission(results)
349
Simran Basid6b83772014-01-06 16:31:30 -0800350 # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
351 # This sleep allows the pending output to be logged before the kill
352 # signal is sent.
353 time.sleep(.1)
Dan Shic68fefb2015-04-07 10:10:52 -0700354 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700355 logging.debug('Destroy container %s before aborting the autoserv '
356 'process.', container_name)
357 try:
358 bucket = lxc.ContainerBucket()
359 container = bucket.get(container_name)
360 if container:
361 container.destroy()
362 else:
363 logging.debug('Container %s is not found.', container_name)
364 except:
365 # Handle any exception so the autoserv process can be aborted.
Dan Shi65374e22016-09-15 16:14:05 -0700366 logging.exception('Failed to destroy container %s.',
367 container_name)
Dan Shie4a4f9f2015-07-20 09:00:25 -0700368 # Try to correct the result file permission again after the
369 # container is destroyed, as the container might have created some
370 # new files in the result folder.
371 if results:
372 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700373
jadmanski0afbb632008-06-06 21:10:57 +0000374 os.killpg(os.getpgrp(), signal.SIGKILL)
mblighfaf0cd42007-11-19 16:00:24 +0000375
jadmanski0afbb632008-06-06 21:10:57 +0000376 # Set signal handler
mblighc2299562009-07-02 19:00:36 +0000377 signal.signal(signal.SIGTERM, handle_sigterm)
mbligha46678d2008-05-01 20:00:01 +0000378
Simran Basid6b83772014-01-06 16:31:30 -0800379 # faulthandler is only needed to debug in the Lab and is not avaliable to
380 # be imported in the chroot as part of VMTest, so Try-Except it.
381 try:
382 import faulthandler
383 faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
384 logging.debug('faulthandler registered on SIGTERM.')
385 except ImportError:
Christopher Grant4beca022015-06-16 15:14:47 -0400386 sys.exc_clear()
Simran Basid6b83772014-01-06 16:31:30 -0800387
David Rochberg8a60d1e2011-02-01 14:22:07 -0500388 # Ignore SIGTTOU's generated by output from forked children.
389 signal.signal(signal.SIGTTOU, signal.SIG_IGN)
390
Alex Millerf1af17e2013-01-09 22:50:32 -0800391 # If we received a SIGALARM, let's be loud about it.
392 signal.signal(signal.SIGALRM, log_alarm)
393
mbligha5f5e542009-12-30 16:57:49 +0000394 # Server side tests that call shell scripts often depend on $USER being set
395 # but depending on how you launch your autotest scheduler it may not be set.
396 os.environ['USER'] = getpass.getuser()
397
mblighb2bea302008-07-24 20:25:57 +0000398 label = parser.options.label
mbligh374f3412009-05-13 21:29:45 +0000399 group_name = parser.options.group_name
mblighb2bea302008-07-24 20:25:57 +0000400 user = parser.options.user
401 client = parser.options.client
402 server = parser.options.server
jadmanski0afbb632008-06-06 21:10:57 +0000403 install_before = parser.options.install_before
mblighb2bea302008-07-24 20:25:57 +0000404 install_after = parser.options.install_after
405 verify = parser.options.verify
406 repair = parser.options.repair
showard45ae8192008-11-05 19:32:53 +0000407 cleanup = parser.options.cleanup
Alex Millercb79ba72013-05-29 14:43:00 -0700408 provision = parser.options.provision
Dan Shi07e09af2013-04-12 09:31:29 -0700409 reset = parser.options.reset
Alex Miller667b5f22014-02-28 15:33:39 -0800410 job_labels = parser.options.job_labels
mblighb2bea302008-07-24 20:25:57 +0000411 no_tee = parser.options.no_tee
jadmanski0afbb632008-06-06 21:10:57 +0000412 parse_job = parser.options.parse_job
mblighe7d9c602009-07-02 19:02:33 +0000413 execution_tag = parser.options.execution_tag
414 if not execution_tag:
415 execution_tag = parse_job
jadmanski0afbb632008-06-06 21:10:57 +0000416 ssh_user = parser.options.ssh_user
417 ssh_port = parser.options.ssh_port
418 ssh_pass = parser.options.ssh_pass
jadmanskidef0c3c2009-03-25 20:07:10 +0000419 collect_crashinfo = parser.options.collect_crashinfo
mblighe0cbc912010-03-11 18:03:07 +0000420 control_filename = parser.options.control_filename
Scott Zawalski91493c82013-01-25 16:15:20 -0500421 test_retry = parser.options.test_retry
beepscb6f1e22013-06-28 19:14:10 -0700422 verify_job_repo_url = parser.options.verify_job_repo_url
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700423 skip_crash_collection = parser.options.skip_crash_collection
Aviv Keshet18ee3142013-08-12 15:01:51 -0700424 ssh_verbosity = int(parser.options.ssh_verbosity)
Fang Deng6cc20de2013-09-06 15:47:32 -0700425 ssh_options = parser.options.ssh_options
Dan Shib669cbd2013-09-13 11:17:17 -0700426 no_use_packaging = parser.options.no_use_packaging
Simran Basi1bf60eb2015-12-01 16:39:29 -0800427 host_attributes = parser.options.host_attributes
428 in_lab = bool(parser.options.lab)
mbligha46678d2008-05-01 20:00:01 +0000429
mblighb2bea302008-07-24 20:25:57 +0000430 # can't be both a client and a server side test
431 if client and server:
Eric Li861b2d52011-02-04 14:50:35 -0800432 parser.parser.error("Can not specify a test as both server and client!")
mblighb2bea302008-07-24 20:25:57 +0000433
Alex Millercb79ba72013-05-29 14:43:00 -0700434 if provision and client:
435 parser.parser.error("Cannot specify provisioning and client!")
436
437 is_special_task = (verify or repair or cleanup or collect_crashinfo or
Dan Shi07e09af2013-04-12 09:31:29 -0700438 provision or reset)
Alex Millercb79ba72013-05-29 14:43:00 -0700439 if len(parser.args) < 1 and not is_special_task:
Eric Li861b2d52011-02-04 14:50:35 -0800440 parser.parser.error("Missing argument: control file")
mbligha46678d2008-05-01 20:00:01 +0000441
Aviv Keshet18ee3142013-08-12 15:01:51 -0700442 if ssh_verbosity > 0:
443 # ssh_verbosity is an integer between 0 and 3, inclusive
444 ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
Fang Dengd1c2b732013-08-20 12:59:46 -0700445 else:
446 ssh_verbosity_flag = ''
Aviv Keshet18ee3142013-08-12 15:01:51 -0700447
showard45ae8192008-11-05 19:32:53 +0000448 # We have a control file unless it's just a verify/repair/cleanup job
jadmanski0afbb632008-06-06 21:10:57 +0000449 if len(parser.args) > 0:
450 control = parser.args[0]
451 else:
452 control = None
mbligha46678d2008-05-01 20:00:01 +0000453
Dan Shicf4d2032015-03-12 15:04:21 -0700454 machines = _get_machines(parser)
mbligh374f3412009-05-13 21:29:45 +0000455 if group_name and len(machines) < 2:
Dan Shicf4d2032015-03-12 15:04:21 -0700456 parser.parser.error('-G %r may only be supplied with more than one '
457 'machine.' % group_name)
mbligh374f3412009-05-13 21:29:45 +0000458
Christopher Wiley8a91f232013-07-09 11:02:27 -0700459 kwargs = {'group_name': group_name, 'tag': execution_tag,
Dan Shicf4d2032015-03-12 15:04:21 -0700460 'disable_sysinfo': parser.options.disable_sysinfo}
Dan Shi70647ca2015-07-16 22:52:35 -0700461 if parser.options.parent_job_id:
462 kwargs['parent_job_id'] = int(parser.options.parent_job_id)
mblighe0cbc912010-03-11 18:03:07 +0000463 if control_filename:
464 kwargs['control_filename'] = control_filename
Simran Basi1bf60eb2015-12-01 16:39:29 -0800465 if host_attributes:
466 kwargs['host_attributes'] = host_attributes
467 kwargs['in_lab'] = in_lab
jadmanski0afbb632008-06-06 21:10:57 +0000468 job = server_job.server_job(control, parser.args[1:], results, label,
469 user, machines, client, parse_job,
Fang Dengd1c2b732013-08-20 12:59:46 -0700470 ssh_user, ssh_port, ssh_pass,
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700471 ssh_verbosity_flag, ssh_options,
472 test_retry, **kwargs)
Dan Shicf4d2032015-03-12 15:04:21 -0700473
showard75cdfee2009-06-10 17:40:41 +0000474 job.logging.start_logging()
mbligh4608b002010-01-05 18:22:35 +0000475 job.init_parser()
mbligha46678d2008-05-01 20:00:01 +0000476
mbligh161fe6f2008-06-19 16:26:04 +0000477 # perform checks
478 job.precheck()
479
jadmanski0afbb632008-06-06 21:10:57 +0000480 # run the job
481 exit_code = 0
Dan Shic1b8bdd2015-09-14 23:11:24 -0700482 auto_start_servod = _CONFIG.get_config_value(
483 'AUTOSERV', 'auto_start_servod', type=bool, default=False)
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700484
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800485 site_utils.SetupTsMonGlobalState('autoserv', indirect=False,
486 short_lived=True)
jadmanski0afbb632008-06-06 21:10:57 +0000487 try:
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800488 try:
489 if repair:
490 if auto_start_servod and len(machines) == 1:
491 _start_servod(machines[0])
492 job.repair(job_labels)
493 elif verify:
494 job.verify(job_labels)
495 elif provision:
496 job.provision(job_labels)
497 elif reset:
498 job.reset(job_labels)
499 elif cleanup:
500 job.cleanup(job_labels)
501 else:
502 if auto_start_servod and len(machines) == 1:
503 _start_servod(machines[0])
504 if use_ssp:
505 try:
506 _run_with_ssp(job, container_name, job_or_task_id,
507 results, parser, ssp_url, job_folder,
508 machines)
509 finally:
510 # Update the ownership of files in result folder.
511 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700512 else:
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800513 if collect_crashinfo:
514 # Update the ownership of files in result folder. If the
515 # job to collect crashinfo was running inside container
516 # (SSP) and crashed before correcting folder permission,
517 # the result folder might have wrong permission setting.
Dan Shiafa63872016-02-23 15:32:31 -0800518 try:
519 correct_results_folder_permission(results)
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800520 except:
521 # Ignore any error as the user may not have root
522 # permission to run sudo command.
523 pass
Aviv Keshet92bf7b62017-02-13 15:34:03 -0800524 metric_name = ('chromeos/autotest/experimental/'
525 'autoserv_job_run_duration')
526 f = {'in_container': utils.is_in_container(),
527 'success': False}
528 with metrics.SecondsTimer(metric_name, fields=f) as c:
529 job.run(install_before, install_after,
530 verify_job_repo_url=verify_job_repo_url,
531 only_collect_crashinfo=collect_crashinfo,
532 skip_crash_collection=skip_crash_collection,
533 job_labels=job_labels,
534 use_packaging=(not no_use_packaging))
535 c['success'] = True
536
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800537 finally:
Hidehiko Abe06893302017-06-24 07:32:38 +0900538 job.close()
Dan Shiffd5b822017-07-14 11:16:23 -0700539 # Special task doesn't run parse, so result summary needs to be
540 # built here.
541 if results and (repair or verify or reset or cleanup or provision):
Dan Shi4f8c0242017-07-07 15:34:49 -0700542 # Throttle the result on the server side.
543 try:
544 result_utils.execute(
545 results, control_data.DEFAULT_MAX_RESULT_SIZE_KB)
546 except:
547 logging.exception(
548 'Non-critical failure: Failed to throttle results '
549 'in directory %s.', results)
550 # Build result view and report metrics for result sizes.
Dan Shiffd5b822017-07-14 11:16:23 -0700551 site_utils.collect_result_sizes(results)
jadmanski0afbb632008-06-06 21:10:57 +0000552 except:
jadmanski27b37ea2008-10-29 23:54:31 +0000553 exit_code = 1
jadmanski0afbb632008-06-06 21:10:57 +0000554 traceback.print_exc()
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800555 finally:
556 metrics.Flush()
mbligha46678d2008-05-01 20:00:01 +0000557
mblighff7d61f2008-12-22 14:53:35 +0000558 if pid_file_manager:
559 pid_file_manager.num_tests_failed = job.num_tests_failed
560 pid_file_manager.close_file(exit_code)
jadmanskie0dffc32008-12-15 17:30:30 +0000561 job.cleanup_parser()
showard21baa452008-10-21 00:08:39 +0000562
jadmanski27b37ea2008-10-29 23:54:31 +0000563 sys.exit(exit_code)
mbligha46678d2008-05-01 20:00:01 +0000564
565
Fang Deng042c1472014-10-23 13:56:41 -0700566def record_autoserv(options, duration_secs):
567 """Record autoserv end-to-end time in metadata db.
568
569 @param options: parser options.
570 @param duration_secs: How long autoserv has taken, in secs.
571 """
572 # Get machine hostname
573 machines = options.machines.replace(
574 ',', ' ').strip().split() if options.machines else []
575 num_machines = len(machines)
576 if num_machines > 1:
577 # Skip the case where atomic group is used.
578 return
579 elif num_machines == 0:
580 machines.append('hostless')
581
582 # Determine the status that will be reported.
583 s = job_overhead.STATUS
584 task_mapping = {
585 'reset': s.RESETTING, 'verify': s.VERIFYING,
586 'provision': s.PROVISIONING, 'repair': s.REPAIRING,
587 'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
Dan Shi888cfca2015-07-31 15:49:00 -0700588 match = filter(lambda task: getattr(options, task, False) == True,
589 task_mapping)
Fang Deng042c1472014-10-23 13:56:41 -0700590 status = task_mapping[match[0]] if match else s.RUNNING
591 is_special_task = status not in [s.RUNNING, s.GATHERING]
Dan Shicf4d2032015-03-12 15:04:21 -0700592 job_or_task_id = job_directories.get_job_id_or_task_id(options.results)
Fang Deng042c1472014-10-23 13:56:41 -0700593 job_overhead.record_state_duration(
594 job_or_task_id, machines[0], status, duration_secs,
595 is_special_task=is_special_task)
596
597
mbligha46678d2008-05-01 20:00:01 +0000598def main():
Fang Deng042c1472014-10-23 13:56:41 -0700599 start_time = datetime.datetime.now()
jadmanski0afbb632008-06-06 21:10:57 +0000600 # grab the parser
601 parser = autoserv_parser.autoserv_parser
mbligha5cb4062009-02-17 15:53:39 +0000602 parser.parse_args()
mbligha46678d2008-05-01 20:00:01 +0000603
jadmanski0afbb632008-06-06 21:10:57 +0000604 if len(sys.argv) == 1:
605 parser.parser.print_help()
606 sys.exit(1)
mbligha6f13082008-06-05 23:53:46 +0000607
showard75cdfee2009-06-10 17:40:41 +0000608 if parser.options.no_logging:
609 results = None
610 else:
611 results = parser.options.results
mbligh80e1eba2008-11-19 00:26:18 +0000612 if not results:
613 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
Dan Shi14de7622016-08-22 11:09:06 -0700614 results = os.path.abspath(results)
showard566d3c02010-01-12 18:57:01 +0000615 resultdir_exists = False
616 for filename in ('control.srv', 'status.log', '.autoserv_execute'):
617 if os.path.exists(os.path.join(results, filename)):
618 resultdir_exists = True
mbligh4608b002010-01-05 18:22:35 +0000619 if not parser.options.use_existing_results and resultdir_exists:
mbligh80e1eba2008-11-19 00:26:18 +0000620 error = "Error: results directory already exists: %s\n" % results
621 sys.stderr.write(error)
622 sys.exit(1)
mbligha788dc42009-03-26 21:10:16 +0000623
624 # Now that we certified that there's no leftover results dir from
625 # previous jobs, lets create the result dir since the logging system
626 # needs to create the log file in there.
627 if not os.path.isdir(results):
628 os.makedirs(results)
showard75cdfee2009-06-10 17:40:41 +0000629
Prathmesh Prabhu7ae68ae2017-06-15 00:29:31 -0700630 # If the job requires to run with server-side package, try to stage server-
631 # side package first. If that fails with error that autotest server package
632 # does not exist, fall back to run the job without using server-side
633 # packaging. If option warn_no_ssp is specified, that means autoserv is
634 # running in a drone does not support SSP, thus no need to stage server-side
635 # package.
636 ssp_url = None
637 ssp_url_warning = False
638 if (not parser.options.warn_no_ssp and parser.options.require_ssp):
Prathmesh Prabhu588007d2017-06-15 00:31:31 -0700639 ssp_url, ssp_error_msg = _stage_ssp(parser, results)
Prathmesh Prabhu7ae68ae2017-06-15 00:29:31 -0700640 # The build does not have autotest server package. Fall back to not
641 # to use server-side package. Logging is postponed until logging being
642 # set up.
643 ssp_url_warning = not ssp_url
644
Dan Shic68fefb2015-04-07 10:10:52 -0700645 # Server-side packaging will only be used if it's required and the package
646 # is available. If warn_no_ssp is specified, it means that autoserv is
647 # running in a drone does not have SSP supported and a warning will be logs.
648 # Therefore, it should not run with SSP.
649 use_ssp = (not parser.options.warn_no_ssp and parser.options.require_ssp
650 and ssp_url)
651 if use_ssp:
Dan Shie28de552015-05-06 16:51:58 -0700652 log_dir = os.path.join(results, 'ssp_logs') if results else None
Dan Shicf4d2032015-03-12 15:04:21 -0700653 if log_dir and not os.path.exists(log_dir):
654 os.makedirs(log_dir)
655 else:
656 log_dir = results
Dan Shi3f1b8a52015-04-21 11:11:06 -0700657
showard75cdfee2009-06-10 17:40:41 +0000658 logging_manager.configure_logging(
Dan Shicf4d2032015-03-12 15:04:21 -0700659 server_logging_config.ServerLoggingConfig(),
660 results_dir=log_dir,
showard10d84172009-06-18 23:16:50 +0000661 use_console=not parser.options.no_tee,
662 verbose=parser.options.verbose,
663 no_console_prefix=parser.options.no_console_prefix)
Dan Shicf4d2032015-03-12 15:04:21 -0700664
Dan Shi0b754c52015-04-20 14:20:38 -0700665 if ssp_url_warning:
666 logging.warn(
667 'Autoserv is required to run with server-side packaging. '
668 'However, no server-side package can be found based on '
Dan Shi6450e142016-03-11 11:52:20 -0800669 '`--image`, host attribute job_repo_url or host OS version '
670 'label. It could be that the build to test is older than the '
671 'minimum version that supports server-side packaging. The test '
Dan Shi14de7622016-08-22 11:09:06 -0700672 'will be executed without using erver-side packaging. '
673 'Following is the detailed error:\n%s', ssp_error_msg)
Dan Shi0b754c52015-04-20 14:20:38 -0700674
showard75cdfee2009-06-10 17:40:41 +0000675 if results:
mbligha788dc42009-03-26 21:10:16 +0000676 logging.info("Results placed in %s" % results)
mbligh10717632008-11-19 00:21:57 +0000677
mbligh4608b002010-01-05 18:22:35 +0000678 # wait until now to perform this check, so it get properly logged
Dan Shicf4d2032015-03-12 15:04:21 -0700679 if (parser.options.use_existing_results and not resultdir_exists and
Dan Shiff78f112015-06-12 13:34:02 -0700680 not utils.is_in_container()):
mbligh4608b002010-01-05 18:22:35 +0000681 logging.error("No existing results directory found: %s", results)
682 sys.exit(1)
683
Dan Shicf4d2032015-03-12 15:04:21 -0700684 logging.debug('autoserv is running in drone %s.', socket.gethostname())
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700685 logging.debug('autoserv command was: %s', ' '.join(sys.argv))
mbligh4608b002010-01-05 18:22:35 +0000686
Dan Shicf4d2032015-03-12 15:04:21 -0700687 if parser.options.write_pidfile and results:
mbligh4608b002010-01-05 18:22:35 +0000688 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
689 results)
jadmanskid5ab8c52008-12-03 16:27:07 +0000690 pid_file_manager.open_file()
mblighff7d61f2008-12-22 14:53:35 +0000691 else:
692 pid_file_manager = None
mbligha46678d2008-05-01 20:00:01 +0000693
jadmanskif22fea82008-11-26 20:57:07 +0000694 autotest.BaseAutotest.set_install_in_tmpdir(
695 parser.options.install_in_tmpdir)
696
Dan Shia1ecd5c2013-06-06 11:21:31 -0700697 try:
698 # Take the first argument as control file name, get the test name from
Prathmesh Prabhueb7cfc12016-11-23 16:37:30 -0800699 # the control file.
Dan Shia1ecd5c2013-06-06 11:21:31 -0700700 if (len(parser.args) > 0 and parser.args[0] != '' and
701 parser.options.machines):
Dan Shibbc16132013-07-09 16:23:59 -0700702 try:
703 test_name = control_data.parse_control(parser.args[0],
704 raise_warnings=True).name
705 except control_data.ControlVariableException:
706 logging.debug('Failed to retrieve test name from control file.')
707 test_name = None
Dan Shia1ecd5c2013-06-06 11:21:31 -0700708 except control_data.ControlVariableException as e:
709 logging.error(str(e))
jadmanski0afbb632008-06-06 21:10:57 +0000710 exit_code = 0
Prashanth B6285f6a2014-05-08 18:01:27 -0700711 # TODO(beeps): Extend this to cover different failure modes.
712 # Testing exceptions are matched against labels sent to autoserv. Eg,
713 # to allow only the hostless job to run, specify
714 # testing_exceptions: test_suite in the shadow_config. To allow both
715 # the hostless job and dummy_Pass to run, specify
716 # testing_exceptions: test_suite,dummy_Pass. You can figure out
717 # what label autoserv is invoked with by looking through the logs of a test
718 # for the autoserv command's -l option.
Dan Shia06f3e22015-09-03 16:15:15 -0700719 testing_exceptions = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700720 'AUTOSERV', 'testing_exceptions', type=list, default=[])
Dan Shia06f3e22015-09-03 16:15:15 -0700721 test_mode = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700722 'AUTOSERV', 'testing_mode', type=bool, default=False)
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800723 test_mode = (results_mocker and test_mode and not
724 any([ex in parser.options.label
725 for ex in testing_exceptions]))
726 is_task = (parser.options.verify or parser.options.repair or
727 parser.options.provision or parser.options.reset or
728 parser.options.cleanup or parser.options.collect_crashinfo)
jadmanski0afbb632008-06-06 21:10:57 +0000729 try:
730 try:
Prashanth B6285f6a2014-05-08 18:01:27 -0700731 if test_mode:
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800732 # The parser doesn't run on tasks anyway, so we can just return
733 # happy signals without faking results.
734 if not is_task:
735 machine = parser.options.results.split('/')[-1]
736
737 # TODO(beeps): The proper way to do this would be to
738 # refactor job creation so we can invoke job.record
739 # directly. To do that one needs to pipe the test_name
740 # through run_autoserv and bail just before invoking
741 # the server job. See the comment in
742 # puppylab/results_mocker for more context.
743 results_mocker.ResultsMocker(
Prashanth Balasubramanian22dd2262014-11-28 18:19:18 -0800744 test_name if test_name else 'unknown-test',
745 parser.options.results, machine
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800746 ).mock_results()
747 return
Prashanth B6285f6a2014-05-08 18:01:27 -0700748 else:
Dan Shic68fefb2015-04-07 10:10:52 -0700749 run_autoserv(pid_file_manager, results, parser, ssp_url,
750 use_ssp)
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700751 except SystemExit as e:
jadmanski0afbb632008-06-06 21:10:57 +0000752 exit_code = e.code
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700753 if exit_code:
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700754 logging.exception('Uncaught SystemExit with code %s', exit_code)
755 except Exception:
jadmanski0afbb632008-06-06 21:10:57 +0000756 # If we don't know what happened, we'll classify it as
757 # an 'abort' and return 1.
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700758 logging.exception('Uncaught Exception, exit_code = 1.')
jadmanski0afbb632008-06-06 21:10:57 +0000759 exit_code = 1
760 finally:
mblighff7d61f2008-12-22 14:53:35 +0000761 if pid_file_manager:
762 pid_file_manager.close_file(exit_code)
Fang Deng042c1472014-10-23 13:56:41 -0700763 # Record the autoserv duration time. Must be called
764 # just before the system exits to ensure accuracy.
765 duration_secs = (datetime.datetime.now() - start_time).total_seconds()
766 record_autoserv(parser.options, duration_secs)
jadmanski0afbb632008-06-06 21:10:57 +0000767 sys.exit(exit_code)
mblighfaf0cd42007-11-19 16:00:24 +0000768
mblighbb421852008-03-11 22:36:16 +0000769
mbligha46678d2008-05-01 20:00:01 +0000770if __name__ == '__main__':
jadmanski0afbb632008-06-06 21:10:57 +0000771 main()