blob: 39d7555147d89bba874cddf56ea1cc136b06d6bf [file] [log] [blame]
mbligh6203ace2007-10-04 21:54:24 +00001#!/usr/bin/python -u
mbligh1ffd5dc2008-11-25 13:24:05 +00002# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
mbligh82648e52008-11-20 16:54:25 +00003# Released under the GPL v2
mblighdcd57a82007-07-11 23:06:47 +00004
mblighc8949b82007-07-23 16:33:58 +00005"""
Aviv Keshetde6bb192013-01-30 16:17:22 -08006Run a control file through the server side engine
mblighdcd57a82007-07-11 23:06:47 +00007"""
mbligh1ffd5dc2008-11-25 13:24:05 +00008
Fang Deng042c1472014-10-23 13:56:41 -07009import datetime
Paul Hobbs20cc72a2016-08-30 16:57:05 -070010import contextlib
Fang Deng042c1472014-10-23 13:56:41 -070011import getpass
12import logging
13import os
14import re
15import signal
Dan Shicf4d2032015-03-12 15:04:21 -070016import socket
Fang Deng042c1472014-10-23 13:56:41 -070017import sys
18import traceback
19import time
20import urllib2
mbligh1ffd5dc2008-11-25 13:24:05 +000021
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080022
mblighf5427bb2008-04-09 15:55:57 +000023import common
Dan Shi4f8c0242017-07-07 15:34:49 -070024from autotest_lib.client.bin.result_tools import utils as result_utils
25from autotest_lib.client.bin.result_tools import view as result_view
Dan Shia1ecd5c2013-06-06 11:21:31 -070026from autotest_lib.client.common_lib import control_data
Dan Shi32649b82015-08-29 20:53:36 -070027from autotest_lib.client.common_lib import error
Dan Shia1ecd5c2013-06-06 11:21:31 -070028from autotest_lib.client.common_lib import global_config
Allen Lif146e872017-08-15 18:24:31 -070029from autotest_lib.server import results_mocker
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080030
Dan Shi5e2efb72017-02-07 11:40:23 -080031try:
32 from chromite.lib import metrics
Paul Hobbse9fd5572017-08-22 02:48:25 -070033 from chromite.lib import cloud_trace
Dan Shi5e2efb72017-02-07 11:40:23 -080034except ImportError:
35 metrics = utils.metrics_mock
Paul Hobbse9fd5572017-08-22 02:48:25 -070036 import mock
37 cloud_trace = mock.MagicMock()
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080038
Dan Shia06f3e22015-09-03 16:15:15 -070039_CONFIG = global_config.global_config
40
Dan Shia1ecd5c2013-06-06 11:21:31 -070041
Jakob Jueliche497b552014-09-23 19:11:59 -070042# Number of seconds to wait before returning if testing mode is enabled
Prashanth B6285f6a2014-05-08 18:01:27 -070043TESTING_MODE_SLEEP_SECS = 1
Jakob Jueliche497b552014-09-23 19:11:59 -070044
mbligh9ff89cd2009-09-03 20:28:17 +000045
Kevin Cheng9b6930f2016-07-20 14:57:15 -070046from autotest_lib.server import frontend
showard75cdfee2009-06-10 17:40:41 +000047from autotest_lib.server import server_logging_config
showard043c62a2009-06-10 19:48:57 +000048from autotest_lib.server import server_job, utils, autoserv_parser, autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070049from autotest_lib.server import utils as server_utils
Paul Hobbs20cc72a2016-08-30 16:57:05 -070050from autotest_lib.server import site_utils
Kevin Chengadc99f92016-07-20 08:21:58 -070051from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Dan Shicf4d2032015-03-12 15:04:21 -070052from autotest_lib.site_utils import job_directories
Fang Deng042c1472014-10-23 13:56:41 -070053from autotest_lib.site_utils import job_overhead
Dan Shicf4d2032015-03-12 15:04:21 -070054from autotest_lib.site_utils import lxc
Ben Kwa966db082017-06-05 14:17:23 -070055from autotest_lib.site_utils.lxc import utils as lxc_utils
showard75cdfee2009-06-10 17:40:41 +000056from autotest_lib.client.common_lib import pidfile, logging_manager
mbligh92c0fc22008-11-20 16:52:23 +000057
Paul Hobbs20cc72a2016-08-30 16:57:05 -070058
Dan Shicf4d2032015-03-12 15:04:21 -070059# Control segment to stage server-side package.
60STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
61 'stage_server_side_package')
62
Dan Shia06f3e22015-09-03 16:15:15 -070063# Command line to start servod in a moblab.
64START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s'
65STOP_SERVOD_CMD = 'sudo stop servod'
66
Alex Millerf1af17e2013-01-09 22:50:32 -080067def log_alarm(signum, frame):
68 logging.error("Received SIGALARM. Ignoring and continuing on.")
Alex Miller0528d6f2013-01-11 10:49:48 -080069 sys.exit(1)
Alex Millerf1af17e2013-01-09 22:50:32 -080070
Dan Shicf4d2032015-03-12 15:04:21 -070071
72def _get_machines(parser):
73 """Get a list of machine names from command line arg -m or a file.
74
75 @param parser: Parser for the command line arguments.
76
77 @return: A list of machine names from command line arg -m or the
78 machines file specified in the command line arg -M.
79 """
80 if parser.options.machines:
81 machines = parser.options.machines.replace(',', ' ').strip().split()
82 else:
83 machines = []
84 machines_file = parser.options.machines_file
85 if machines_file:
86 machines = []
87 for m in open(machines_file, 'r').readlines():
88 # remove comments, spaces
89 m = re.sub('#.*', '', m).strip()
90 if m:
91 machines.append(m)
92 logging.debug('Read list of machines from file: %s', machines_file)
93 logging.debug('Machines: %s', ','.join(machines))
94
95 if machines:
96 for machine in machines:
97 if not machine or re.search('\s', machine):
98 parser.parser.error("Invalid machine: %s" % str(machine))
99 machines = list(set(machines))
100 machines.sort()
101 return machines
102
103
Prathmesh Prabhu588007d2017-06-15 00:31:31 -0700104def _stage_ssp(parser, resultsdir):
Dan Shicf4d2032015-03-12 15:04:21 -0700105 """Stage server-side package.
106
107 This function calls a control segment to stage server-side package based on
108 the job and autoserv command line option. The detail implementation could
109 be different for each host type. Currently, only CrosHost has
110 stage_server_side_package function defined.
111 The script returns None if no server-side package is available. However,
112 it may raise exception if it failed for reasons other than artifact (the
113 server-side package) not found.
114
115 @param parser: Command line arguments parser passed in the autoserv process.
Prathmesh Prabhu588007d2017-06-15 00:31:31 -0700116 @param resultsdir: Folder to store results. This could be different from
117 parser.options.results: parser.options.results can be set to None
118 for results to be stored in a temp folder. resultsdir can be None
119 for autoserv run requires no logging.
Dan Shicf4d2032015-03-12 15:04:21 -0700120
Dan Shi14de7622016-08-22 11:09:06 -0700121 @return: (ssp_url, error_msg), where
122 ssp_url is a url to the autotest server-side package. None if
123 server-side package is not supported.
124 error_msg is a string indicating the failures. None if server-
125 side package is staged successfully.
Dan Shicf4d2032015-03-12 15:04:21 -0700126 """
Kevin Chengadc99f92016-07-20 08:21:58 -0700127 machines_list = _get_machines(parser)
Prathmesh Prabhu588007d2017-06-15 00:31:31 -0700128 machines_list = server_job.get_machine_dicts(machines_list, resultsdir,
129 parser.options.lab,
130 parser.options.host_attributes)
Kevin Chengadc99f92016-07-20 08:21:58 -0700131
Dan Shi36cfd832014-10-10 13:38:51 -0700132 # If test_source_build is not specified, default to use server-side test
133 # code from build specified in --image.
Kevin Chengadc99f92016-07-20 08:21:58 -0700134 namespace = {'machines': machines_list,
Dan Shi36cfd832014-10-10 13:38:51 -0700135 'image': (parser.options.test_source_build or
136 parser.options.image),}
Dan Shicf4d2032015-03-12 15:04:21 -0700137 script_locals = {}
138 execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
Dan Shi14de7622016-08-22 11:09:06 -0700139 return script_locals['ssp_url'], script_locals['error_msg']
Dan Shicf4d2032015-03-12 15:04:21 -0700140
141
Dan Shiafa63872016-02-23 15:32:31 -0800142def _run_with_ssp(job, container_name, job_id, results, parser, ssp_url,
Dan Shi3be35af2016-08-25 23:22:40 -0700143 job_folder, machines):
Dan Shicf4d2032015-03-12 15:04:21 -0700144 """Run the server job with server-side packaging.
145
Dan Shi37befda2015-12-07 13:16:56 -0800146 @param job: The server job object.
Dan Shicf4d2032015-03-12 15:04:21 -0700147 @param container_name: Name of the container to run the test.
148 @param job_id: ID of the test job.
149 @param results: Folder to store results. This could be different from
150 parser.options.results:
151 parser.options.results can be set to None for results to be
152 stored in a temp folder.
153 results can be None for autoserv run requires no logging.
154 @param parser: Command line parser that contains the options.
155 @param ssp_url: url of the staged server-side package.
Dan Shiafa63872016-02-23 15:32:31 -0800156 @param job_folder: Name of the job result folder.
Dan Shi3be35af2016-08-25 23:22:40 -0700157 @param machines: A list of machines to run the test.
Dan Shicf4d2032015-03-12 15:04:21 -0700158 """
159 bucket = lxc.ContainerBucket()
160 control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
161 else None)
Dan Shi37befda2015-12-07 13:16:56 -0800162 try:
Dan Shi3be35af2016-08-25 23:22:40 -0700163 dut_name = machines[0] if len(machines) >= 1 else None
Dan Shi37befda2015-12-07 13:16:56 -0800164 test_container = bucket.setup_test(container_name, job_id, ssp_url,
Dan Shiafa63872016-02-23 15:32:31 -0800165 results, control=control,
Dan Shi3be35af2016-08-25 23:22:40 -0700166 job_folder=job_folder,
167 dut_name=dut_name)
Dan Shi37befda2015-12-07 13:16:56 -0800168 except Exception as e:
169 job.record('FAIL', None, None,
170 'Failed to setup container for test: %s. Check logs in '
171 'ssp_logs folder for more details.' % e)
172 raise
173
Dan Shicf4d2032015-03-12 15:04:21 -0700174 args = sys.argv[:]
175 args.remove('--require-ssp')
Dan Shi77b79a62015-07-29 16:22:05 -0700176 # --parent_job_id is only useful in autoserv running in host, not in
177 # container. Include this argument will cause test to fail for builds before
178 # CL 286265 was merged.
179 if '--parent_job_id' in args:
180 index = args.index('--parent_job_id')
181 args.remove('--parent_job_id')
182 # Remove the actual parent job id in command line arg.
183 del args[index]
Dan Shicf4d2032015-03-12 15:04:21 -0700184
185 # A dictionary of paths to replace in the command line. Key is the path to
186 # be replaced with the one in value.
187 paths_to_replace = {}
188 # Replace the control file path with the one in container.
189 if control:
190 container_control_filename = os.path.join(
191 lxc.CONTROL_TEMP_PATH, os.path.basename(control))
192 paths_to_replace[control] = container_control_filename
193 # Update result directory with the one in container.
Dan Shi65374e22016-09-15 16:14:05 -0700194 container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_folder)
Dan Shicf4d2032015-03-12 15:04:21 -0700195 if parser.options.results:
Dan Shicf4d2032015-03-12 15:04:21 -0700196 paths_to_replace[parser.options.results] = container_result_dir
197 # Update parse_job directory with the one in container. The assumption is
198 # that the result folder to be parsed is always the same as the results_dir.
199 if parser.options.parse_job:
Dan Shicf4d2032015-03-12 15:04:21 -0700200 paths_to_replace[parser.options.parse_job] = container_result_dir
201
202 args = [paths_to_replace.get(arg, arg) for arg in args]
203
204 # Apply --use-existing-results, results directory is aready created and
205 # mounted in container. Apply this arg to avoid exception being raised.
206 if not '--use-existing-results' in args:
207 args.append('--use-existing-results')
208
209 # Make sure autoserv running in container using a different pid file.
210 if not '--pidfile-label' in args:
211 args.extend(['--pidfile-label', 'container_autoserv'])
212
Dan Shid1f51232015-04-18 00:29:14 -0700213 cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
Dan Shicf4d2032015-03-12 15:04:21 -0700214 logging.info('Run command in container: %s', cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700215 success = False
Dan Shicf4d2032015-03-12 15:04:21 -0700216 try:
217 test_container.attach_run(cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700218 success = True
Dan Shi9d3454e2015-12-08 09:16:08 -0800219 except Exception as e:
220 # If the test run inside container fails without generating any log,
221 # write a message to status.log to help troubleshooting.
222 debug_files = os.listdir(os.path.join(results, 'debug'))
223 if not debug_files:
224 job.record('FAIL', None, None,
225 'Failed to run test inside the container: %s. Check '
226 'logs in ssp_logs folder for more details.' % e)
227 raise
Dan Shicf4d2032015-03-12 15:04:21 -0700228 finally:
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -0800229 metrics.Counter(
230 'chromeos/autotest/experimental/execute_job_in_ssp').increment(
231 fields={'success': success})
Dan Shicf4d2032015-03-12 15:04:21 -0700232 test_container.destroy()
233
234
Dan Shi3f1b8a52015-04-21 11:11:06 -0700235def correct_results_folder_permission(results):
236 """Make sure the results folder has the right permission settings.
237
238 For tests running with server-side packaging, the results folder has the
239 owner of root. This must be changed to the user running the autoserv
240 process, so parsing job can access the results folder.
241 TODO(dshi): crbug.com/459344 Remove this function when test container can be
242 unprivileged container.
243
244 @param results: Path to the results folder.
245
246 """
247 if not results:
248 return
249
Aviv Keshetc03de792017-07-18 14:24:31 -0700250 utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
251 utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
Dan Shi3f1b8a52015-04-21 11:11:06 -0700252
253
Dan Shia06f3e22015-09-03 16:15:15 -0700254def _start_servod(machine):
255 """Try to start servod in moblab if it's not already running or running with
256 different board or port.
257
258 @param machine: Name of the dut used for test.
259 """
260 if not utils.is_moblab():
261 return
262
Dan Shi1cded882015-09-23 16:52:26 -0700263 logging.debug('Trying to start servod.')
Dan Shia06f3e22015-09-03 16:15:15 -0700264 try:
Kevin Cheng9b6930f2016-07-20 14:57:15 -0700265 afe = frontend.AFE()
Dan Shia06f3e22015-09-03 16:15:15 -0700266 board = server_utils.get_board_from_afe(machine, afe)
267 hosts = afe.get_hosts(hostname=machine)
268 servo_host = hosts[0].attributes.get('servo_host', None)
269 servo_port = hosts[0].attributes.get('servo_port', 9999)
270 if not servo_host in ['localhost', '127.0.0.1']:
Dan Shi1cded882015-09-23 16:52:26 -0700271 logging.warn('Starting servod is aborted. The dut\'s servo_host '
272 'attribute is not set to localhost.')
Dan Shia06f3e22015-09-03 16:15:15 -0700273 return
274 except (urllib2.HTTPError, urllib2.URLError):
275 # Ignore error if RPC failed to get board
276 logging.error('Failed to get board name from AFE. Start servod is '
277 'aborted')
278 return
279
280 try:
281 pid = utils.run('pgrep servod').stdout
282 cmd_line = utils.run('ps -fp %s' % pid).stdout
283 if ('--board %s' % board in cmd_line and
284 '--port %s' % servo_port in cmd_line):
285 logging.debug('Servod is already running with given board and port.'
286 ' There is no need to restart servod.')
287 return
288 logging.debug('Servod is running with different board or port. '
289 'Stopping existing servod.')
290 utils.run('sudo stop servod')
291 except error.CmdError:
292 # servod is not running.
293 pass
294
295 try:
296 utils.run(START_SERVOD_CMD % (board, servo_port))
297 logging.debug('Servod is started')
298 except error.CmdError as e:
299 logging.error('Servod failed to be started, error: %s', e)
300
301
Dan Shic68fefb2015-04-07 10:10:52 -0700302def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700303 """Run server job with given options.
304
305 @param pid_file_manager: PidFileManager used to monitor the autoserv process
306 @param results: Folder to store results.
307 @param parser: Parser for the command line arguments.
308 @param ssp_url: Url to server-side package.
Dan Shic68fefb2015-04-07 10:10:52 -0700309 @param use_ssp: Set to True to run with server-side packaging.
Dan Shicf4d2032015-03-12 15:04:21 -0700310 """
Dan Shiec1d47d2015-02-13 11:38:13 -0800311 if parser.options.warn_no_ssp:
Dan Shic68fefb2015-04-07 10:10:52 -0700312 # Post a warning in the log.
Dan Shiec1d47d2015-02-13 11:38:13 -0800313 logging.warn('Autoserv is required to run with server-side packaging. '
314 'However, no drone is found to support server-side '
315 'packaging. The test will be executed in a drone without '
316 'server-side packaging supported.')
317
jadmanski0afbb632008-06-06 21:10:57 +0000318 # send stdin to /dev/null
319 dev_null = os.open(os.devnull, os.O_RDONLY)
320 os.dup2(dev_null, sys.stdin.fileno())
321 os.close(dev_null)
mblighdbf37612007-11-24 19:38:11 +0000322
Dan Shie8aeb662016-06-30 11:22:03 -0700323 # Create separate process group if the process is not a process group
324 # leader. This allows autoserv process to keep running after the caller
325 # process (drone manager call) exits.
326 if os.getpid() != os.getpgid(0):
327 os.setsid()
mbligh1d42d4e2007-11-05 22:42:00 +0000328
Dan Shicf4d2032015-03-12 15:04:21 -0700329 # Container name is predefined so the container can be destroyed in
330 # handle_sigterm.
331 job_or_task_id = job_directories.get_job_id_or_task_id(
332 parser.options.results)
333 container_name = (lxc.TEST_CONTAINER_NAME_FMT %
Dan Shid68d51c2015-04-21 17:00:42 -0700334 (job_or_task_id, time.time(), os.getpid()))
Dan Shiafa63872016-02-23 15:32:31 -0800335 job_folder = job_directories.get_job_folder_name(parser.options.results)
Dan Shicf4d2032015-03-12 15:04:21 -0700336
jadmanski0afbb632008-06-06 21:10:57 +0000337 # Implement SIGTERM handler
mblighc2299562009-07-02 19:00:36 +0000338 def handle_sigterm(signum, frame):
Simran Basi9d9b7292013-10-16 16:44:07 -0700339 logging.debug('Received SIGTERM')
mblighff7d61f2008-12-22 14:53:35 +0000340 if pid_file_manager:
341 pid_file_manager.close_file(1, signal.SIGTERM)
Simran Basi49e21e62013-10-17 12:40:33 -0700342 logging.debug('Finished writing to pid_file. Killing process.')
Dan Shi3f1b8a52015-04-21 11:11:06 -0700343
344 # Update results folder's file permission. This needs to be done ASAP
345 # before the parsing process tries to access the log.
346 if use_ssp and results:
347 correct_results_folder_permission(results)
348
Simran Basid6b83772014-01-06 16:31:30 -0800349 # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
350 # This sleep allows the pending output to be logged before the kill
351 # signal is sent.
352 time.sleep(.1)
Dan Shic68fefb2015-04-07 10:10:52 -0700353 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700354 logging.debug('Destroy container %s before aborting the autoserv '
355 'process.', container_name)
356 try:
357 bucket = lxc.ContainerBucket()
358 container = bucket.get(container_name)
359 if container:
360 container.destroy()
361 else:
362 logging.debug('Container %s is not found.', container_name)
363 except:
364 # Handle any exception so the autoserv process can be aborted.
Dan Shi65374e22016-09-15 16:14:05 -0700365 logging.exception('Failed to destroy container %s.',
366 container_name)
Dan Shie4a4f9f2015-07-20 09:00:25 -0700367 # Try to correct the result file permission again after the
368 # container is destroyed, as the container might have created some
369 # new files in the result folder.
370 if results:
371 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700372
jadmanski0afbb632008-06-06 21:10:57 +0000373 os.killpg(os.getpgrp(), signal.SIGKILL)
mblighfaf0cd42007-11-19 16:00:24 +0000374
jadmanski0afbb632008-06-06 21:10:57 +0000375 # Set signal handler
mblighc2299562009-07-02 19:00:36 +0000376 signal.signal(signal.SIGTERM, handle_sigterm)
mbligha46678d2008-05-01 20:00:01 +0000377
Simran Basid6b83772014-01-06 16:31:30 -0800378 # faulthandler is only needed to debug in the Lab and is not avaliable to
379 # be imported in the chroot as part of VMTest, so Try-Except it.
380 try:
381 import faulthandler
382 faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
383 logging.debug('faulthandler registered on SIGTERM.')
384 except ImportError:
Christopher Grant4beca022015-06-16 15:14:47 -0400385 sys.exc_clear()
Simran Basid6b83772014-01-06 16:31:30 -0800386
David Rochberg8a60d1e2011-02-01 14:22:07 -0500387 # Ignore SIGTTOU's generated by output from forked children.
388 signal.signal(signal.SIGTTOU, signal.SIG_IGN)
389
Alex Millerf1af17e2013-01-09 22:50:32 -0800390 # If we received a SIGALARM, let's be loud about it.
391 signal.signal(signal.SIGALRM, log_alarm)
392
mbligha5f5e542009-12-30 16:57:49 +0000393 # Server side tests that call shell scripts often depend on $USER being set
394 # but depending on how you launch your autotest scheduler it may not be set.
395 os.environ['USER'] = getpass.getuser()
396
mblighb2bea302008-07-24 20:25:57 +0000397 label = parser.options.label
mbligh374f3412009-05-13 21:29:45 +0000398 group_name = parser.options.group_name
mblighb2bea302008-07-24 20:25:57 +0000399 user = parser.options.user
400 client = parser.options.client
401 server = parser.options.server
jadmanski0afbb632008-06-06 21:10:57 +0000402 install_before = parser.options.install_before
mblighb2bea302008-07-24 20:25:57 +0000403 install_after = parser.options.install_after
404 verify = parser.options.verify
405 repair = parser.options.repair
showard45ae8192008-11-05 19:32:53 +0000406 cleanup = parser.options.cleanup
Alex Millercb79ba72013-05-29 14:43:00 -0700407 provision = parser.options.provision
Dan Shi07e09af2013-04-12 09:31:29 -0700408 reset = parser.options.reset
Alex Miller667b5f22014-02-28 15:33:39 -0800409 job_labels = parser.options.job_labels
mblighb2bea302008-07-24 20:25:57 +0000410 no_tee = parser.options.no_tee
jadmanski0afbb632008-06-06 21:10:57 +0000411 parse_job = parser.options.parse_job
mblighe7d9c602009-07-02 19:02:33 +0000412 execution_tag = parser.options.execution_tag
413 if not execution_tag:
414 execution_tag = parse_job
jadmanski0afbb632008-06-06 21:10:57 +0000415 ssh_user = parser.options.ssh_user
416 ssh_port = parser.options.ssh_port
417 ssh_pass = parser.options.ssh_pass
jadmanskidef0c3c2009-03-25 20:07:10 +0000418 collect_crashinfo = parser.options.collect_crashinfo
mblighe0cbc912010-03-11 18:03:07 +0000419 control_filename = parser.options.control_filename
Scott Zawalski91493c82013-01-25 16:15:20 -0500420 test_retry = parser.options.test_retry
beepscb6f1e22013-06-28 19:14:10 -0700421 verify_job_repo_url = parser.options.verify_job_repo_url
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700422 skip_crash_collection = parser.options.skip_crash_collection
Aviv Keshet18ee3142013-08-12 15:01:51 -0700423 ssh_verbosity = int(parser.options.ssh_verbosity)
Fang Deng6cc20de2013-09-06 15:47:32 -0700424 ssh_options = parser.options.ssh_options
Dan Shib669cbd2013-09-13 11:17:17 -0700425 no_use_packaging = parser.options.no_use_packaging
Simran Basi1bf60eb2015-12-01 16:39:29 -0800426 host_attributes = parser.options.host_attributes
427 in_lab = bool(parser.options.lab)
mbligha46678d2008-05-01 20:00:01 +0000428
mblighb2bea302008-07-24 20:25:57 +0000429 # can't be both a client and a server side test
430 if client and server:
Eric Li861b2d52011-02-04 14:50:35 -0800431 parser.parser.error("Can not specify a test as both server and client!")
mblighb2bea302008-07-24 20:25:57 +0000432
Alex Millercb79ba72013-05-29 14:43:00 -0700433 if provision and client:
434 parser.parser.error("Cannot specify provisioning and client!")
435
436 is_special_task = (verify or repair or cleanup or collect_crashinfo or
Dan Shi07e09af2013-04-12 09:31:29 -0700437 provision or reset)
Alex Millercb79ba72013-05-29 14:43:00 -0700438 if len(parser.args) < 1 and not is_special_task:
Eric Li861b2d52011-02-04 14:50:35 -0800439 parser.parser.error("Missing argument: control file")
mbligha46678d2008-05-01 20:00:01 +0000440
Aviv Keshet18ee3142013-08-12 15:01:51 -0700441 if ssh_verbosity > 0:
442 # ssh_verbosity is an integer between 0 and 3, inclusive
443 ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
Fang Dengd1c2b732013-08-20 12:59:46 -0700444 else:
445 ssh_verbosity_flag = ''
Aviv Keshet18ee3142013-08-12 15:01:51 -0700446
showard45ae8192008-11-05 19:32:53 +0000447 # We have a control file unless it's just a verify/repair/cleanup job
jadmanski0afbb632008-06-06 21:10:57 +0000448 if len(parser.args) > 0:
449 control = parser.args[0]
450 else:
451 control = None
mbligha46678d2008-05-01 20:00:01 +0000452
Dan Shicf4d2032015-03-12 15:04:21 -0700453 machines = _get_machines(parser)
mbligh374f3412009-05-13 21:29:45 +0000454 if group_name and len(machines) < 2:
Dan Shicf4d2032015-03-12 15:04:21 -0700455 parser.parser.error('-G %r may only be supplied with more than one '
456 'machine.' % group_name)
mbligh374f3412009-05-13 21:29:45 +0000457
Christopher Wiley8a91f232013-07-09 11:02:27 -0700458 kwargs = {'group_name': group_name, 'tag': execution_tag,
Dan Shicf4d2032015-03-12 15:04:21 -0700459 'disable_sysinfo': parser.options.disable_sysinfo}
Dan Shi70647ca2015-07-16 22:52:35 -0700460 if parser.options.parent_job_id:
461 kwargs['parent_job_id'] = int(parser.options.parent_job_id)
mblighe0cbc912010-03-11 18:03:07 +0000462 if control_filename:
463 kwargs['control_filename'] = control_filename
Simran Basi1bf60eb2015-12-01 16:39:29 -0800464 if host_attributes:
465 kwargs['host_attributes'] = host_attributes
466 kwargs['in_lab'] = in_lab
jadmanski0afbb632008-06-06 21:10:57 +0000467 job = server_job.server_job(control, parser.args[1:], results, label,
468 user, machines, client, parse_job,
Fang Dengd1c2b732013-08-20 12:59:46 -0700469 ssh_user, ssh_port, ssh_pass,
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700470 ssh_verbosity_flag, ssh_options,
471 test_retry, **kwargs)
Dan Shicf4d2032015-03-12 15:04:21 -0700472
showard75cdfee2009-06-10 17:40:41 +0000473 job.logging.start_logging()
mbligh4608b002010-01-05 18:22:35 +0000474 job.init_parser()
mbligha46678d2008-05-01 20:00:01 +0000475
mbligh161fe6f2008-06-19 16:26:04 +0000476 # perform checks
477 job.precheck()
478
jadmanski0afbb632008-06-06 21:10:57 +0000479 # run the job
480 exit_code = 0
Dan Shic1b8bdd2015-09-14 23:11:24 -0700481 auto_start_servod = _CONFIG.get_config_value(
482 'AUTOSERV', 'auto_start_servod', type=bool, default=False)
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700483
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800484 site_utils.SetupTsMonGlobalState('autoserv', indirect=False,
Paul Hobbs66251f62017-08-22 02:31:07 -0700485 auto_flush=False, short_lived=True)
jadmanski0afbb632008-06-06 21:10:57 +0000486 try:
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800487 try:
488 if repair:
489 if auto_start_servod and len(machines) == 1:
490 _start_servod(machines[0])
491 job.repair(job_labels)
492 elif verify:
493 job.verify(job_labels)
494 elif provision:
495 job.provision(job_labels)
496 elif reset:
497 job.reset(job_labels)
498 elif cleanup:
499 job.cleanup(job_labels)
500 else:
501 if auto_start_servod and len(machines) == 1:
502 _start_servod(machines[0])
503 if use_ssp:
504 try:
505 _run_with_ssp(job, container_name, job_or_task_id,
506 results, parser, ssp_url, job_folder,
507 machines)
508 finally:
509 # Update the ownership of files in result folder.
510 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700511 else:
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800512 if collect_crashinfo:
513 # Update the ownership of files in result folder. If the
514 # job to collect crashinfo was running inside container
515 # (SSP) and crashed before correcting folder permission,
516 # the result folder might have wrong permission setting.
Dan Shiafa63872016-02-23 15:32:31 -0800517 try:
518 correct_results_folder_permission(results)
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800519 except:
520 # Ignore any error as the user may not have root
521 # permission to run sudo command.
522 pass
Aviv Keshet92bf7b62017-02-13 15:34:03 -0800523 metric_name = ('chromeos/autotest/experimental/'
524 'autoserv_job_run_duration')
525 f = {'in_container': utils.is_in_container(),
526 'success': False}
527 with metrics.SecondsTimer(metric_name, fields=f) as c:
528 job.run(install_before, install_after,
529 verify_job_repo_url=verify_job_repo_url,
530 only_collect_crashinfo=collect_crashinfo,
531 skip_crash_collection=skip_crash_collection,
532 job_labels=job_labels,
533 use_packaging=(not no_use_packaging))
534 c['success'] = True
535
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800536 finally:
Hidehiko Abe06893302017-06-24 07:32:38 +0900537 job.close()
Dan Shiffd5b822017-07-14 11:16:23 -0700538 # Special task doesn't run parse, so result summary needs to be
539 # built here.
540 if results and (repair or verify or reset or cleanup or provision):
Dan Shi4f8c0242017-07-07 15:34:49 -0700541 # Throttle the result on the server side.
542 try:
543 result_utils.execute(
544 results, control_data.DEFAULT_MAX_RESULT_SIZE_KB)
545 except:
546 logging.exception(
547 'Non-critical failure: Failed to throttle results '
548 'in directory %s.', results)
549 # Build result view and report metrics for result sizes.
Dan Shiffd5b822017-07-14 11:16:23 -0700550 site_utils.collect_result_sizes(results)
jadmanski0afbb632008-06-06 21:10:57 +0000551 except:
jadmanski27b37ea2008-10-29 23:54:31 +0000552 exit_code = 1
jadmanski0afbb632008-06-06 21:10:57 +0000553 traceback.print_exc()
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800554 finally:
555 metrics.Flush()
mbligha46678d2008-05-01 20:00:01 +0000556
mblighff7d61f2008-12-22 14:53:35 +0000557 if pid_file_manager:
558 pid_file_manager.num_tests_failed = job.num_tests_failed
559 pid_file_manager.close_file(exit_code)
jadmanskie0dffc32008-12-15 17:30:30 +0000560 job.cleanup_parser()
showard21baa452008-10-21 00:08:39 +0000561
jadmanski27b37ea2008-10-29 23:54:31 +0000562 sys.exit(exit_code)
mbligha46678d2008-05-01 20:00:01 +0000563
564
Paul Hobbs68d98592017-08-22 02:22:49 -0700565def record_autoserv(options, start_time):
Fang Deng042c1472014-10-23 13:56:41 -0700566 """Record autoserv end-to-end time in metadata db.
567
568 @param options: parser options.
Paul Hobbs68d98592017-08-22 02:22:49 -0700569 @param start_time: When autoserv started
Fang Deng042c1472014-10-23 13:56:41 -0700570 """
571 # Get machine hostname
572 machines = options.machines.replace(
573 ',', ' ').strip().split() if options.machines else []
574 num_machines = len(machines)
575 if num_machines > 1:
576 # Skip the case where atomic group is used.
577 return
578 elif num_machines == 0:
579 machines.append('hostless')
580
581 # Determine the status that will be reported.
Paul Hobbs68d98592017-08-22 02:22:49 -0700582 status = get_job_status(options)
583 is_special_task = status not in [
Paul Hobbse9fd5572017-08-22 02:48:25 -0700584 job_overhead.STATUS.RUNNING, job_overhead.STATUS.GATHERING]
Paul Hobbs68d98592017-08-22 02:22:49 -0700585 job_or_task_id = job_directories.get_job_id_or_task_id(options.results)
586 duration_secs = (datetime.datetime.now() - start_time).total_seconds()
587 job_overhead.record_state_duration(
588 job_or_task_id, machines[0], status, duration_secs,
589 is_special_task=is_special_task)
590
591
592def get_job_status(options):
593 """Returns the HQE Status for this run.
594
595 @param options: parser options.
596 """
Fang Deng042c1472014-10-23 13:56:41 -0700597 s = job_overhead.STATUS
598 task_mapping = {
599 'reset': s.RESETTING, 'verify': s.VERIFYING,
600 'provision': s.PROVISIONING, 'repair': s.REPAIRING,
601 'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
Paul Hobbs68d98592017-08-22 02:22:49 -0700602 match = [task for task in task_mapping if getattr(options, task, False)]
603 return task_mapping[match[0]] if match else s.RUNNING
Fang Deng042c1472014-10-23 13:56:41 -0700604
605
mbligha46678d2008-05-01 20:00:01 +0000606def main():
Fang Deng042c1472014-10-23 13:56:41 -0700607 start_time = datetime.datetime.now()
jadmanski0afbb632008-06-06 21:10:57 +0000608 # grab the parser
609 parser = autoserv_parser.autoserv_parser
mbligha5cb4062009-02-17 15:53:39 +0000610 parser.parse_args()
mbligha46678d2008-05-01 20:00:01 +0000611
jadmanski0afbb632008-06-06 21:10:57 +0000612 if len(sys.argv) == 1:
613 parser.parser.print_help()
614 sys.exit(1)
mbligha6f13082008-06-05 23:53:46 +0000615
showard75cdfee2009-06-10 17:40:41 +0000616 if parser.options.no_logging:
617 results = None
618 else:
619 results = parser.options.results
mbligh80e1eba2008-11-19 00:26:18 +0000620 if not results:
621 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
Dan Shi14de7622016-08-22 11:09:06 -0700622 results = os.path.abspath(results)
showard566d3c02010-01-12 18:57:01 +0000623 resultdir_exists = False
624 for filename in ('control.srv', 'status.log', '.autoserv_execute'):
625 if os.path.exists(os.path.join(results, filename)):
626 resultdir_exists = True
mbligh4608b002010-01-05 18:22:35 +0000627 if not parser.options.use_existing_results and resultdir_exists:
mbligh80e1eba2008-11-19 00:26:18 +0000628 error = "Error: results directory already exists: %s\n" % results
629 sys.stderr.write(error)
630 sys.exit(1)
mbligha788dc42009-03-26 21:10:16 +0000631
632 # Now that we certified that there's no leftover results dir from
633 # previous jobs, lets create the result dir since the logging system
634 # needs to create the log file in there.
635 if not os.path.isdir(results):
636 os.makedirs(results)
showard75cdfee2009-06-10 17:40:41 +0000637
Prathmesh Prabhu7ae68ae2017-06-15 00:29:31 -0700638 # If the job requires to run with server-side package, try to stage server-
639 # side package first. If that fails with error that autotest server package
640 # does not exist, fall back to run the job without using server-side
641 # packaging. If option warn_no_ssp is specified, that means autoserv is
642 # running in a drone does not support SSP, thus no need to stage server-side
643 # package.
644 ssp_url = None
645 ssp_url_warning = False
646 if (not parser.options.warn_no_ssp and parser.options.require_ssp):
Prathmesh Prabhu588007d2017-06-15 00:31:31 -0700647 ssp_url, ssp_error_msg = _stage_ssp(parser, results)
Prathmesh Prabhu7ae68ae2017-06-15 00:29:31 -0700648 # The build does not have autotest server package. Fall back to not
649 # to use server-side package. Logging is postponed until logging being
650 # set up.
651 ssp_url_warning = not ssp_url
652
Dan Shic68fefb2015-04-07 10:10:52 -0700653 # Server-side packaging will only be used if it's required and the package
654 # is available. If warn_no_ssp is specified, it means that autoserv is
655 # running in a drone does not have SSP supported and a warning will be logs.
656 # Therefore, it should not run with SSP.
657 use_ssp = (not parser.options.warn_no_ssp and parser.options.require_ssp
658 and ssp_url)
659 if use_ssp:
Dan Shie28de552015-05-06 16:51:58 -0700660 log_dir = os.path.join(results, 'ssp_logs') if results else None
Dan Shicf4d2032015-03-12 15:04:21 -0700661 if log_dir and not os.path.exists(log_dir):
662 os.makedirs(log_dir)
663 else:
664 log_dir = results
Dan Shi3f1b8a52015-04-21 11:11:06 -0700665
showard75cdfee2009-06-10 17:40:41 +0000666 logging_manager.configure_logging(
Dan Shicf4d2032015-03-12 15:04:21 -0700667 server_logging_config.ServerLoggingConfig(),
668 results_dir=log_dir,
showard10d84172009-06-18 23:16:50 +0000669 use_console=not parser.options.no_tee,
670 verbose=parser.options.verbose,
671 no_console_prefix=parser.options.no_console_prefix)
Dan Shicf4d2032015-03-12 15:04:21 -0700672
Dan Shi0b754c52015-04-20 14:20:38 -0700673 if ssp_url_warning:
674 logging.warn(
675 'Autoserv is required to run with server-side packaging. '
676 'However, no server-side package can be found based on '
Dan Shi6450e142016-03-11 11:52:20 -0800677 '`--image`, host attribute job_repo_url or host OS version '
678 'label. It could be that the build to test is older than the '
679 'minimum version that supports server-side packaging. The test '
Dan Shi14de7622016-08-22 11:09:06 -0700680 'will be executed without using erver-side packaging. '
681 'Following is the detailed error:\n%s', ssp_error_msg)
Dan Shi0b754c52015-04-20 14:20:38 -0700682
showard75cdfee2009-06-10 17:40:41 +0000683 if results:
mbligha788dc42009-03-26 21:10:16 +0000684 logging.info("Results placed in %s" % results)
mbligh10717632008-11-19 00:21:57 +0000685
mbligh4608b002010-01-05 18:22:35 +0000686 # wait until now to perform this check, so it get properly logged
Dan Shicf4d2032015-03-12 15:04:21 -0700687 if (parser.options.use_existing_results and not resultdir_exists and
Dan Shiff78f112015-06-12 13:34:02 -0700688 not utils.is_in_container()):
mbligh4608b002010-01-05 18:22:35 +0000689 logging.error("No existing results directory found: %s", results)
690 sys.exit(1)
691
Dan Shicf4d2032015-03-12 15:04:21 -0700692 logging.debug('autoserv is running in drone %s.', socket.gethostname())
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700693 logging.debug('autoserv command was: %s', ' '.join(sys.argv))
mbligh4608b002010-01-05 18:22:35 +0000694
Dan Shicf4d2032015-03-12 15:04:21 -0700695 if parser.options.write_pidfile and results:
mbligh4608b002010-01-05 18:22:35 +0000696 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
697 results)
jadmanskid5ab8c52008-12-03 16:27:07 +0000698 pid_file_manager.open_file()
mblighff7d61f2008-12-22 14:53:35 +0000699 else:
700 pid_file_manager = None
mbligha46678d2008-05-01 20:00:01 +0000701
Allen Lid5abdab2017-02-07 16:03:43 -0800702 autotest.Autotest.set_install_in_tmpdir(
jadmanskif22fea82008-11-26 20:57:07 +0000703 parser.options.install_in_tmpdir)
704
jadmanski0afbb632008-06-06 21:10:57 +0000705 exit_code = 0
Allen Lif146e872017-08-15 18:24:31 -0700706 # TODO(beeps): Extend this to cover different failure modes.
707 # Testing exceptions are matched against labels sent to autoserv. Eg,
708 # to allow only the hostless job to run, specify
709 # testing_exceptions: test_suite in the shadow_config. To allow both
710 # the hostless job and dummy_Pass to run, specify
711 # testing_exceptions: test_suite,dummy_Pass. You can figure out
712 # what label autoserv is invoked with by looking through the logs of a test
713 # for the autoserv command's -l option.
714 testing_exceptions = _CONFIG.get_config_value(
715 'AUTOSERV', 'testing_exceptions', type=list, default=[])
716 test_mode = _CONFIG.get_config_value(
717 'AUTOSERV', 'testing_mode', type=bool, default=False)
718 test_mode = (results_mocker and test_mode and not
719 any([ex in parser.options.label
720 for ex in testing_exceptions]))
721 is_task = (parser.options.verify or parser.options.repair or
722 parser.options.provision or parser.options.reset or
723 parser.options.cleanup or parser.options.collect_crashinfo)
Paul Hobbse9fd5572017-08-22 02:48:25 -0700724
725 trace_labels = {
726 'job_id': job_directories.get_job_id_or_task_id(
727 parser.options.results)
728 }
729 trace = cloud_trace.SpanStack(
730 labels=trace_labels,
731 global_context=parser.options.cloud_trace_context)
732 trace.enabled = parser.options.cloud_trace_context_enabled == 'True'
jadmanski0afbb632008-06-06 21:10:57 +0000733 try:
734 try:
Allen Lif146e872017-08-15 18:24:31 -0700735 if test_mode:
736 # The parser doesn't run on tasks anyway, so we can just return
737 # happy signals without faking results.
738 if not is_task:
739 machine = parser.options.results.split('/')[-1]
740
741 # TODO(beeps): The proper way to do this would be to
742 # refactor job creation so we can invoke job.record
743 # directly. To do that one needs to pipe the test_name
744 # through run_autoserv and bail just before invoking
745 # the server job. See the comment in
746 # puppylab/results_mocker for more context.
747 results_mocker.ResultsMocker(
748 'unknown-test', parser.options.results, machine
749 ).mock_results()
750 return
751 else:
Paul Hobbse9fd5572017-08-22 02:48:25 -0700752 with trace.Span(get_job_status(parser.options)):
753 run_autoserv(pid_file_manager, results, parser, ssp_url,
754 use_ssp)
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700755 except SystemExit as e:
jadmanski0afbb632008-06-06 21:10:57 +0000756 exit_code = e.code
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700757 if exit_code:
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700758 logging.exception('Uncaught SystemExit with code %s', exit_code)
759 except Exception:
jadmanski0afbb632008-06-06 21:10:57 +0000760 # If we don't know what happened, we'll classify it as
761 # an 'abort' and return 1.
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700762 logging.exception('Uncaught Exception, exit_code = 1.')
jadmanski0afbb632008-06-06 21:10:57 +0000763 exit_code = 1
764 finally:
mblighff7d61f2008-12-22 14:53:35 +0000765 if pid_file_manager:
766 pid_file_manager.close_file(exit_code)
Fang Deng042c1472014-10-23 13:56:41 -0700767 # Record the autoserv duration time. Must be called
768 # just before the system exits to ensure accuracy.
Paul Hobbs68d98592017-08-22 02:22:49 -0700769 record_autoserv(parser.options, start_time)
jadmanski0afbb632008-06-06 21:10:57 +0000770 sys.exit(exit_code)
mblighfaf0cd42007-11-19 16:00:24 +0000771
mblighbb421852008-03-11 22:36:16 +0000772
mbligha46678d2008-05-01 20:00:01 +0000773if __name__ == '__main__':
jadmanski0afbb632008-06-06 21:10:57 +0000774 main()