blob: 3025dbf6344b442b2901ac12fab76031a4c310f9 [file] [log] [blame]
mbligh6203ace2007-10-04 21:54:24 +00001#!/usr/bin/python -u
mbligh1ffd5dc2008-11-25 13:24:05 +00002# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
mbligh82648e52008-11-20 16:54:25 +00003# Released under the GPL v2
mblighdcd57a82007-07-11 23:06:47 +00004
mblighc8949b82007-07-23 16:33:58 +00005"""
Aviv Keshetde6bb192013-01-30 16:17:22 -08006Run a control file through the server side engine
mblighdcd57a82007-07-11 23:06:47 +00007"""
mbligh1ffd5dc2008-11-25 13:24:05 +00008
Fang Deng042c1472014-10-23 13:56:41 -07009import datetime
Paul Hobbs20cc72a2016-08-30 16:57:05 -070010import contextlib
Fang Deng042c1472014-10-23 13:56:41 -070011import getpass
12import logging
13import os
14import re
15import signal
Dan Shicf4d2032015-03-12 15:04:21 -070016import socket
Fang Deng042c1472014-10-23 13:56:41 -070017import sys
18import traceback
19import time
20import urllib2
mbligh1ffd5dc2008-11-25 13:24:05 +000021
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080022
mblighf5427bb2008-04-09 15:55:57 +000023import common
Dan Shi4f8c0242017-07-07 15:34:49 -070024from autotest_lib.client.bin.result_tools import utils as result_utils
25from autotest_lib.client.bin.result_tools import view as result_view
Dan Shia1ecd5c2013-06-06 11:21:31 -070026from autotest_lib.client.common_lib import control_data
Dan Shi32649b82015-08-29 20:53:36 -070027from autotest_lib.client.common_lib import error
Dan Shia1ecd5c2013-06-06 11:21:31 -070028from autotest_lib.client.common_lib import global_config
Allen Lif146e872017-08-15 18:24:31 -070029from autotest_lib.server import results_mocker
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080030
Dan Shi5e2efb72017-02-07 11:40:23 -080031try:
32 from chromite.lib import metrics
Paul Hobbse9fd5572017-08-22 02:48:25 -070033 from chromite.lib import cloud_trace
Dan Shi5e2efb72017-02-07 11:40:23 -080034except ImportError:
Prathmesh Prabhud16c8012017-08-28 11:42:46 -070035 from autotest_lib.client.common_lib import utils as common_utils
36 metrics = common_utils.metrics_mock
Paul Hobbse9fd5572017-08-22 02:48:25 -070037 import mock
38 cloud_trace = mock.MagicMock()
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080039
Dan Shia06f3e22015-09-03 16:15:15 -070040_CONFIG = global_config.global_config
41
Dan Shia1ecd5c2013-06-06 11:21:31 -070042
Jakob Jueliche497b552014-09-23 19:11:59 -070043# Number of seconds to wait before returning if testing mode is enabled
Prashanth B6285f6a2014-05-08 18:01:27 -070044TESTING_MODE_SLEEP_SECS = 1
Jakob Jueliche497b552014-09-23 19:11:59 -070045
mbligh9ff89cd2009-09-03 20:28:17 +000046
Kevin Cheng9b6930f2016-07-20 14:57:15 -070047from autotest_lib.server import frontend
showard75cdfee2009-06-10 17:40:41 +000048from autotest_lib.server import server_logging_config
showard043c62a2009-06-10 19:48:57 +000049from autotest_lib.server import server_job, utils, autoserv_parser, autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070050from autotest_lib.server import utils as server_utils
Paul Hobbs20cc72a2016-08-30 16:57:05 -070051from autotest_lib.server import site_utils
Kevin Chengadc99f92016-07-20 08:21:58 -070052from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Dan Shicf4d2032015-03-12 15:04:21 -070053from autotest_lib.site_utils import job_directories
Fang Deng042c1472014-10-23 13:56:41 -070054from autotest_lib.site_utils import job_overhead
Dan Shicf4d2032015-03-12 15:04:21 -070055from autotest_lib.site_utils import lxc
Ben Kwa966db082017-06-05 14:17:23 -070056from autotest_lib.site_utils.lxc import utils as lxc_utils
showard75cdfee2009-06-10 17:40:41 +000057from autotest_lib.client.common_lib import pidfile, logging_manager
mbligh92c0fc22008-11-20 16:52:23 +000058
Paul Hobbs20cc72a2016-08-30 16:57:05 -070059
Dan Shicf4d2032015-03-12 15:04:21 -070060# Control segment to stage server-side package.
61STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
62 'stage_server_side_package')
63
Dan Shia06f3e22015-09-03 16:15:15 -070064# Command line to start servod in a moblab.
65START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s'
66STOP_SERVOD_CMD = 'sudo stop servod'
67
Alex Millerf1af17e2013-01-09 22:50:32 -080068def log_alarm(signum, frame):
69 logging.error("Received SIGALARM. Ignoring and continuing on.")
Alex Miller0528d6f2013-01-11 10:49:48 -080070 sys.exit(1)
Alex Millerf1af17e2013-01-09 22:50:32 -080071
Dan Shicf4d2032015-03-12 15:04:21 -070072
73def _get_machines(parser):
74 """Get a list of machine names from command line arg -m or a file.
75
76 @param parser: Parser for the command line arguments.
77
78 @return: A list of machine names from command line arg -m or the
79 machines file specified in the command line arg -M.
80 """
81 if parser.options.machines:
82 machines = parser.options.machines.replace(',', ' ').strip().split()
83 else:
84 machines = []
85 machines_file = parser.options.machines_file
86 if machines_file:
87 machines = []
88 for m in open(machines_file, 'r').readlines():
89 # remove comments, spaces
90 m = re.sub('#.*', '', m).strip()
91 if m:
92 machines.append(m)
93 logging.debug('Read list of machines from file: %s', machines_file)
94 logging.debug('Machines: %s', ','.join(machines))
95
96 if machines:
97 for machine in machines:
98 if not machine or re.search('\s', machine):
99 parser.parser.error("Invalid machine: %s" % str(machine))
100 machines = list(set(machines))
101 machines.sort()
102 return machines
103
104
Prathmesh Prabhu588007d2017-06-15 00:31:31 -0700105def _stage_ssp(parser, resultsdir):
Dan Shicf4d2032015-03-12 15:04:21 -0700106 """Stage server-side package.
107
108 This function calls a control segment to stage server-side package based on
109 the job and autoserv command line option. The detail implementation could
110 be different for each host type. Currently, only CrosHost has
111 stage_server_side_package function defined.
112 The script returns None if no server-side package is available. However,
113 it may raise exception if it failed for reasons other than artifact (the
114 server-side package) not found.
115
116 @param parser: Command line arguments parser passed in the autoserv process.
Prathmesh Prabhu588007d2017-06-15 00:31:31 -0700117 @param resultsdir: Folder to store results. This could be different from
118 parser.options.results: parser.options.results can be set to None
119 for results to be stored in a temp folder. resultsdir can be None
120 for autoserv run requires no logging.
Dan Shicf4d2032015-03-12 15:04:21 -0700121
Dan Shi14de7622016-08-22 11:09:06 -0700122 @return: (ssp_url, error_msg), where
123 ssp_url is a url to the autotest server-side package. None if
124 server-side package is not supported.
125 error_msg is a string indicating the failures. None if server-
126 side package is staged successfully.
Dan Shicf4d2032015-03-12 15:04:21 -0700127 """
Kevin Chengadc99f92016-07-20 08:21:58 -0700128 machines_list = _get_machines(parser)
Prathmesh Prabhu7fc39c52018-03-21 14:08:30 -0700129 machines_list = server_job.get_machine_dicts(
130 machine_names=machines_list,
131 store_dir=os.path.join(resultsdir, parser.options.host_info_subdir),
132 in_lab=parser.options.lab,
133 use_shadow_store=not parser.options.local_only_host_info,
134 host_attributes=parser.options.host_attributes,
135 )
Kevin Chengadc99f92016-07-20 08:21:58 -0700136
Dan Shi36cfd832014-10-10 13:38:51 -0700137 # If test_source_build is not specified, default to use server-side test
138 # code from build specified in --image.
Kevin Chengadc99f92016-07-20 08:21:58 -0700139 namespace = {'machines': machines_list,
Dan Shi36cfd832014-10-10 13:38:51 -0700140 'image': (parser.options.test_source_build or
141 parser.options.image),}
Dan Shicf4d2032015-03-12 15:04:21 -0700142 script_locals = {}
143 execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
Dan Shi14de7622016-08-22 11:09:06 -0700144 return script_locals['ssp_url'], script_locals['error_msg']
Dan Shicf4d2032015-03-12 15:04:21 -0700145
146
Ben Kwabedacad2017-08-28 12:20:38 -0700147def _run_with_ssp(job, container_id, job_id, results, parser, ssp_url,
Dan Shi3be35af2016-08-25 23:22:40 -0700148 job_folder, machines):
Dan Shicf4d2032015-03-12 15:04:21 -0700149 """Run the server job with server-side packaging.
150
Dan Shi37befda2015-12-07 13:16:56 -0800151 @param job: The server job object.
Ben Kwabedacad2017-08-28 12:20:38 -0700152 @param container_id: ID of the container to run the test.
Dan Shicf4d2032015-03-12 15:04:21 -0700153 @param job_id: ID of the test job.
154 @param results: Folder to store results. This could be different from
155 parser.options.results:
156 parser.options.results can be set to None for results to be
157 stored in a temp folder.
158 results can be None for autoserv run requires no logging.
159 @param parser: Command line parser that contains the options.
160 @param ssp_url: url of the staged server-side package.
Dan Shiafa63872016-02-23 15:32:31 -0800161 @param job_folder: Name of the job result folder.
Dan Shi3be35af2016-08-25 23:22:40 -0700162 @param machines: A list of machines to run the test.
Dan Shicf4d2032015-03-12 15:04:21 -0700163 """
164 bucket = lxc.ContainerBucket()
165 control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
166 else None)
Dan Shi37befda2015-12-07 13:16:56 -0800167 try:
Dan Shi3be35af2016-08-25 23:22:40 -0700168 dut_name = machines[0] if len(machines) >= 1 else None
Ben Kwabedacad2017-08-28 12:20:38 -0700169 test_container = bucket.setup_test(container_id, job_id, ssp_url,
Dan Shiafa63872016-02-23 15:32:31 -0800170 results, control=control,
Dan Shi3be35af2016-08-25 23:22:40 -0700171 job_folder=job_folder,
172 dut_name=dut_name)
Dan Shi37befda2015-12-07 13:16:56 -0800173 except Exception as e:
174 job.record('FAIL', None, None,
175 'Failed to setup container for test: %s. Check logs in '
176 'ssp_logs folder for more details.' % e)
177 raise
178
Dan Shicf4d2032015-03-12 15:04:21 -0700179 args = sys.argv[:]
180 args.remove('--require-ssp')
Dan Shi77b79a62015-07-29 16:22:05 -0700181 # --parent_job_id is only useful in autoserv running in host, not in
182 # container. Include this argument will cause test to fail for builds before
183 # CL 286265 was merged.
184 if '--parent_job_id' in args:
185 index = args.index('--parent_job_id')
186 args.remove('--parent_job_id')
187 # Remove the actual parent job id in command line arg.
188 del args[index]
Dan Shicf4d2032015-03-12 15:04:21 -0700189
190 # A dictionary of paths to replace in the command line. Key is the path to
191 # be replaced with the one in value.
192 paths_to_replace = {}
193 # Replace the control file path with the one in container.
194 if control:
195 container_control_filename = os.path.join(
196 lxc.CONTROL_TEMP_PATH, os.path.basename(control))
197 paths_to_replace[control] = container_control_filename
198 # Update result directory with the one in container.
Dan Shi65374e22016-09-15 16:14:05 -0700199 container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_folder)
Dan Shicf4d2032015-03-12 15:04:21 -0700200 if parser.options.results:
Dan Shicf4d2032015-03-12 15:04:21 -0700201 paths_to_replace[parser.options.results] = container_result_dir
202 # Update parse_job directory with the one in container. The assumption is
203 # that the result folder to be parsed is always the same as the results_dir.
204 if parser.options.parse_job:
Dan Shicf4d2032015-03-12 15:04:21 -0700205 paths_to_replace[parser.options.parse_job] = container_result_dir
206
207 args = [paths_to_replace.get(arg, arg) for arg in args]
208
209 # Apply --use-existing-results, results directory is aready created and
210 # mounted in container. Apply this arg to avoid exception being raised.
211 if not '--use-existing-results' in args:
212 args.append('--use-existing-results')
213
214 # Make sure autoserv running in container using a different pid file.
215 if not '--pidfile-label' in args:
216 args.extend(['--pidfile-label', 'container_autoserv'])
217
Dan Shid1f51232015-04-18 00:29:14 -0700218 cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
Dan Shicf4d2032015-03-12 15:04:21 -0700219 logging.info('Run command in container: %s', cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700220 success = False
Dan Shicf4d2032015-03-12 15:04:21 -0700221 try:
222 test_container.attach_run(cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700223 success = True
Dan Shi9d3454e2015-12-08 09:16:08 -0800224 except Exception as e:
225 # If the test run inside container fails without generating any log,
226 # write a message to status.log to help troubleshooting.
227 debug_files = os.listdir(os.path.join(results, 'debug'))
228 if not debug_files:
229 job.record('FAIL', None, None,
230 'Failed to run test inside the container: %s. Check '
231 'logs in ssp_logs folder for more details.' % e)
232 raise
Dan Shicf4d2032015-03-12 15:04:21 -0700233 finally:
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -0800234 metrics.Counter(
235 'chromeos/autotest/experimental/execute_job_in_ssp').increment(
236 fields={'success': success})
Dan Shicf4d2032015-03-12 15:04:21 -0700237 test_container.destroy()
238
239
Dan Shi3f1b8a52015-04-21 11:11:06 -0700240def correct_results_folder_permission(results):
241 """Make sure the results folder has the right permission settings.
242
243 For tests running with server-side packaging, the results folder has the
244 owner of root. This must be changed to the user running the autoserv
245 process, so parsing job can access the results folder.
246 TODO(dshi): crbug.com/459344 Remove this function when test container can be
247 unprivileged container.
248
249 @param results: Path to the results folder.
250
251 """
252 if not results:
253 return
254
Aviv Keshetc03de792017-07-18 14:24:31 -0700255 utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
256 utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
Dan Shi3f1b8a52015-04-21 11:11:06 -0700257
258
Dan Shia06f3e22015-09-03 16:15:15 -0700259def _start_servod(machine):
260 """Try to start servod in moblab if it's not already running or running with
261 different board or port.
262
263 @param machine: Name of the dut used for test.
264 """
265 if not utils.is_moblab():
266 return
267
Dan Shi1cded882015-09-23 16:52:26 -0700268 logging.debug('Trying to start servod.')
Dan Shia06f3e22015-09-03 16:15:15 -0700269 try:
Kevin Cheng9b6930f2016-07-20 14:57:15 -0700270 afe = frontend.AFE()
Dan Shia06f3e22015-09-03 16:15:15 -0700271 board = server_utils.get_board_from_afe(machine, afe)
272 hosts = afe.get_hosts(hostname=machine)
273 servo_host = hosts[0].attributes.get('servo_host', None)
274 servo_port = hosts[0].attributes.get('servo_port', 9999)
275 if not servo_host in ['localhost', '127.0.0.1']:
Dan Shi1cded882015-09-23 16:52:26 -0700276 logging.warn('Starting servod is aborted. The dut\'s servo_host '
277 'attribute is not set to localhost.')
Dan Shia06f3e22015-09-03 16:15:15 -0700278 return
279 except (urllib2.HTTPError, urllib2.URLError):
280 # Ignore error if RPC failed to get board
281 logging.error('Failed to get board name from AFE. Start servod is '
282 'aborted')
283 return
284
285 try:
286 pid = utils.run('pgrep servod').stdout
287 cmd_line = utils.run('ps -fp %s' % pid).stdout
288 if ('--board %s' % board in cmd_line and
289 '--port %s' % servo_port in cmd_line):
290 logging.debug('Servod is already running with given board and port.'
291 ' There is no need to restart servod.')
292 return
293 logging.debug('Servod is running with different board or port. '
294 'Stopping existing servod.')
295 utils.run('sudo stop servod')
296 except error.CmdError:
297 # servod is not running.
298 pass
299
300 try:
301 utils.run(START_SERVOD_CMD % (board, servo_port))
302 logging.debug('Servod is started')
303 except error.CmdError as e:
304 logging.error('Servod failed to be started, error: %s', e)
305
306
Dan Shic68fefb2015-04-07 10:10:52 -0700307def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700308 """Run server job with given options.
309
310 @param pid_file_manager: PidFileManager used to monitor the autoserv process
311 @param results: Folder to store results.
312 @param parser: Parser for the command line arguments.
313 @param ssp_url: Url to server-side package.
Dan Shic68fefb2015-04-07 10:10:52 -0700314 @param use_ssp: Set to True to run with server-side packaging.
Dan Shicf4d2032015-03-12 15:04:21 -0700315 """
Dan Shiec1d47d2015-02-13 11:38:13 -0800316 if parser.options.warn_no_ssp:
Dan Shic68fefb2015-04-07 10:10:52 -0700317 # Post a warning in the log.
Dan Shiec1d47d2015-02-13 11:38:13 -0800318 logging.warn('Autoserv is required to run with server-side packaging. '
319 'However, no drone is found to support server-side '
320 'packaging. The test will be executed in a drone without '
321 'server-side packaging supported.')
322
jadmanski0afbb632008-06-06 21:10:57 +0000323 # send stdin to /dev/null
324 dev_null = os.open(os.devnull, os.O_RDONLY)
325 os.dup2(dev_null, sys.stdin.fileno())
326 os.close(dev_null)
mblighdbf37612007-11-24 19:38:11 +0000327
Dan Shie8aeb662016-06-30 11:22:03 -0700328 # Create separate process group if the process is not a process group
329 # leader. This allows autoserv process to keep running after the caller
330 # process (drone manager call) exits.
331 if os.getpid() != os.getpgid(0):
332 os.setsid()
mbligh1d42d4e2007-11-05 22:42:00 +0000333
Dan Shicf4d2032015-03-12 15:04:21 -0700334 # Container name is predefined so the container can be destroyed in
335 # handle_sigterm.
336 job_or_task_id = job_directories.get_job_id_or_task_id(
337 parser.options.results)
Ben Kwabedacad2017-08-28 12:20:38 -0700338 container_id = lxc.ContainerId(job_or_task_id, time.time(), os.getpid())
Dan Shiafa63872016-02-23 15:32:31 -0800339 job_folder = job_directories.get_job_folder_name(parser.options.results)
Dan Shicf4d2032015-03-12 15:04:21 -0700340
jadmanski0afbb632008-06-06 21:10:57 +0000341 # Implement SIGTERM handler
mblighc2299562009-07-02 19:00:36 +0000342 def handle_sigterm(signum, frame):
Simran Basi9d9b7292013-10-16 16:44:07 -0700343 logging.debug('Received SIGTERM')
mblighff7d61f2008-12-22 14:53:35 +0000344 if pid_file_manager:
345 pid_file_manager.close_file(1, signal.SIGTERM)
Simran Basi49e21e62013-10-17 12:40:33 -0700346 logging.debug('Finished writing to pid_file. Killing process.')
Dan Shi3f1b8a52015-04-21 11:11:06 -0700347
348 # Update results folder's file permission. This needs to be done ASAP
349 # before the parsing process tries to access the log.
350 if use_ssp and results:
351 correct_results_folder_permission(results)
352
Simran Basid6b83772014-01-06 16:31:30 -0800353 # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
354 # This sleep allows the pending output to be logged before the kill
355 # signal is sent.
356 time.sleep(.1)
Dan Shic68fefb2015-04-07 10:10:52 -0700357 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700358 logging.debug('Destroy container %s before aborting the autoserv '
Ben Kwabedacad2017-08-28 12:20:38 -0700359 'process.', container_id)
Dan Shicf4d2032015-03-12 15:04:21 -0700360 try:
361 bucket = lxc.ContainerBucket()
Ben Kwabedacad2017-08-28 12:20:38 -0700362 container = bucket.get_container(container_id)
Dan Shicf4d2032015-03-12 15:04:21 -0700363 if container:
364 container.destroy()
365 else:
Ben Kwabedacad2017-08-28 12:20:38 -0700366 logging.debug('Container %s is not found.', container_id)
Dan Shicf4d2032015-03-12 15:04:21 -0700367 except:
368 # Handle any exception so the autoserv process can be aborted.
Dan Shi65374e22016-09-15 16:14:05 -0700369 logging.exception('Failed to destroy container %s.',
Ben Kwabedacad2017-08-28 12:20:38 -0700370 container_id)
Dan Shie4a4f9f2015-07-20 09:00:25 -0700371 # Try to correct the result file permission again after the
372 # container is destroyed, as the container might have created some
373 # new files in the result folder.
374 if results:
375 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700376
jadmanski0afbb632008-06-06 21:10:57 +0000377 os.killpg(os.getpgrp(), signal.SIGKILL)
mblighfaf0cd42007-11-19 16:00:24 +0000378
jadmanski0afbb632008-06-06 21:10:57 +0000379 # Set signal handler
mblighc2299562009-07-02 19:00:36 +0000380 signal.signal(signal.SIGTERM, handle_sigterm)
mbligha46678d2008-05-01 20:00:01 +0000381
Simran Basid6b83772014-01-06 16:31:30 -0800382 # faulthandler is only needed to debug in the Lab and is not avaliable to
383 # be imported in the chroot as part of VMTest, so Try-Except it.
384 try:
385 import faulthandler
386 faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
387 logging.debug('faulthandler registered on SIGTERM.')
388 except ImportError:
Christopher Grant4beca022015-06-16 15:14:47 -0400389 sys.exc_clear()
Simran Basid6b83772014-01-06 16:31:30 -0800390
David Rochberg8a60d1e2011-02-01 14:22:07 -0500391 # Ignore SIGTTOU's generated by output from forked children.
392 signal.signal(signal.SIGTTOU, signal.SIG_IGN)
393
Alex Millerf1af17e2013-01-09 22:50:32 -0800394 # If we received a SIGALARM, let's be loud about it.
395 signal.signal(signal.SIGALRM, log_alarm)
396
mbligha5f5e542009-12-30 16:57:49 +0000397 # Server side tests that call shell scripts often depend on $USER being set
398 # but depending on how you launch your autotest scheduler it may not be set.
399 os.environ['USER'] = getpass.getuser()
400
mblighb2bea302008-07-24 20:25:57 +0000401 label = parser.options.label
mbligh374f3412009-05-13 21:29:45 +0000402 group_name = parser.options.group_name
mblighb2bea302008-07-24 20:25:57 +0000403 user = parser.options.user
404 client = parser.options.client
405 server = parser.options.server
jadmanski0afbb632008-06-06 21:10:57 +0000406 install_before = parser.options.install_before
mblighb2bea302008-07-24 20:25:57 +0000407 install_after = parser.options.install_after
408 verify = parser.options.verify
409 repair = parser.options.repair
showard45ae8192008-11-05 19:32:53 +0000410 cleanup = parser.options.cleanup
Alex Millercb79ba72013-05-29 14:43:00 -0700411 provision = parser.options.provision
Dan Shi07e09af2013-04-12 09:31:29 -0700412 reset = parser.options.reset
Alex Miller667b5f22014-02-28 15:33:39 -0800413 job_labels = parser.options.job_labels
mblighb2bea302008-07-24 20:25:57 +0000414 no_tee = parser.options.no_tee
jadmanski0afbb632008-06-06 21:10:57 +0000415 parse_job = parser.options.parse_job
mblighe7d9c602009-07-02 19:02:33 +0000416 execution_tag = parser.options.execution_tag
417 if not execution_tag:
418 execution_tag = parse_job
jadmanski0afbb632008-06-06 21:10:57 +0000419 ssh_user = parser.options.ssh_user
420 ssh_port = parser.options.ssh_port
421 ssh_pass = parser.options.ssh_pass
jadmanskidef0c3c2009-03-25 20:07:10 +0000422 collect_crashinfo = parser.options.collect_crashinfo
mblighe0cbc912010-03-11 18:03:07 +0000423 control_filename = parser.options.control_filename
Scott Zawalski91493c82013-01-25 16:15:20 -0500424 test_retry = parser.options.test_retry
beepscb6f1e22013-06-28 19:14:10 -0700425 verify_job_repo_url = parser.options.verify_job_repo_url
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700426 skip_crash_collection = parser.options.skip_crash_collection
Aviv Keshet18ee3142013-08-12 15:01:51 -0700427 ssh_verbosity = int(parser.options.ssh_verbosity)
Fang Deng6cc20de2013-09-06 15:47:32 -0700428 ssh_options = parser.options.ssh_options
Dan Shib669cbd2013-09-13 11:17:17 -0700429 no_use_packaging = parser.options.no_use_packaging
Simran Basi1bf60eb2015-12-01 16:39:29 -0800430 in_lab = bool(parser.options.lab)
mbligha46678d2008-05-01 20:00:01 +0000431
mblighb2bea302008-07-24 20:25:57 +0000432 # can't be both a client and a server side test
433 if client and server:
Eric Li861b2d52011-02-04 14:50:35 -0800434 parser.parser.error("Can not specify a test as both server and client!")
mblighb2bea302008-07-24 20:25:57 +0000435
Alex Millercb79ba72013-05-29 14:43:00 -0700436 if provision and client:
437 parser.parser.error("Cannot specify provisioning and client!")
438
439 is_special_task = (verify or repair or cleanup or collect_crashinfo or
Dan Shi07e09af2013-04-12 09:31:29 -0700440 provision or reset)
Alex Millercb79ba72013-05-29 14:43:00 -0700441 if len(parser.args) < 1 and not is_special_task:
Eric Li861b2d52011-02-04 14:50:35 -0800442 parser.parser.error("Missing argument: control file")
mbligha46678d2008-05-01 20:00:01 +0000443
Aviv Keshet18ee3142013-08-12 15:01:51 -0700444 if ssh_verbosity > 0:
445 # ssh_verbosity is an integer between 0 and 3, inclusive
446 ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
Fang Dengd1c2b732013-08-20 12:59:46 -0700447 else:
448 ssh_verbosity_flag = ''
Aviv Keshet18ee3142013-08-12 15:01:51 -0700449
showard45ae8192008-11-05 19:32:53 +0000450 # We have a control file unless it's just a verify/repair/cleanup job
jadmanski0afbb632008-06-06 21:10:57 +0000451 if len(parser.args) > 0:
452 control = parser.args[0]
453 else:
454 control = None
mbligha46678d2008-05-01 20:00:01 +0000455
Dan Shicf4d2032015-03-12 15:04:21 -0700456 machines = _get_machines(parser)
mbligh374f3412009-05-13 21:29:45 +0000457 if group_name and len(machines) < 2:
Dan Shicf4d2032015-03-12 15:04:21 -0700458 parser.parser.error('-G %r may only be supplied with more than one '
459 'machine.' % group_name)
mbligh374f3412009-05-13 21:29:45 +0000460
Prathmesh Prabhu7fc39c52018-03-21 14:08:30 -0700461 job_kwargs = {
462 'control': control,
463 'args': parser.args[1:],
464 'resultdir': results,
465 'label': label,
466 'user': user,
467 'machines': machines,
468 'machine_dict_list': server_job.get_machine_dicts(
469 machine_names=machines,
470 store_dir=os.path.join(results,
471 parser.options.host_info_subdir),
472 in_lab=in_lab,
473 use_shadow_store=not parser.options.local_only_host_info,
474 host_attributes=parser.options.host_attributes,
475 ),
476 'client': client,
477 'parse_job': parse_job,
478 'ssh_user': ssh_user,
479 'ssh_port': ssh_port,
480 'ssh_pass': ssh_pass,
481 'ssh_verbosity_flag': ssh_verbosity_flag,
482 'ssh_options': ssh_options,
483 'test_retry': test_retry,
484 'group_name': group_name,
485 'tag': execution_tag,
486 'disable_sysinfo': parser.options.disable_sysinfo,
487 'in_lab': in_lab,
488 }
Dan Shi70647ca2015-07-16 22:52:35 -0700489 if parser.options.parent_job_id:
Prathmesh Prabhu7fc39c52018-03-21 14:08:30 -0700490 job_kwargs['parent_job_id'] = int(parser.options.parent_job_id)
mblighe0cbc912010-03-11 18:03:07 +0000491 if control_filename:
Prathmesh Prabhu7fc39c52018-03-21 14:08:30 -0700492 job_kwargs['control_filename'] = control_filename
493 job = server_job.server_job(**job_kwargs)
Dan Shicf4d2032015-03-12 15:04:21 -0700494
showard75cdfee2009-06-10 17:40:41 +0000495 job.logging.start_logging()
mbligh4608b002010-01-05 18:22:35 +0000496 job.init_parser()
mbligha46678d2008-05-01 20:00:01 +0000497
mbligh161fe6f2008-06-19 16:26:04 +0000498 # perform checks
499 job.precheck()
500
jadmanski0afbb632008-06-06 21:10:57 +0000501 # run the job
502 exit_code = 0
Dan Shic1b8bdd2015-09-14 23:11:24 -0700503 auto_start_servod = _CONFIG.get_config_value(
504 'AUTOSERV', 'auto_start_servod', type=bool, default=False)
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700505
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800506 site_utils.SetupTsMonGlobalState('autoserv', indirect=False,
Paul Hobbs66251f62017-08-22 02:31:07 -0700507 auto_flush=False, short_lived=True)
jadmanski0afbb632008-06-06 21:10:57 +0000508 try:
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800509 try:
510 if repair:
511 if auto_start_servod and len(machines) == 1:
512 _start_servod(machines[0])
513 job.repair(job_labels)
514 elif verify:
515 job.verify(job_labels)
516 elif provision:
517 job.provision(job_labels)
518 elif reset:
519 job.reset(job_labels)
520 elif cleanup:
521 job.cleanup(job_labels)
522 else:
523 if auto_start_servod and len(machines) == 1:
524 _start_servod(machines[0])
525 if use_ssp:
526 try:
Ben Kwabedacad2017-08-28 12:20:38 -0700527 _run_with_ssp(job, container_id, job_or_task_id,
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800528 results, parser, ssp_url, job_folder,
529 machines)
530 finally:
531 # Update the ownership of files in result folder.
532 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700533 else:
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800534 if collect_crashinfo:
535 # Update the ownership of files in result folder. If the
536 # job to collect crashinfo was running inside container
537 # (SSP) and crashed before correcting folder permission,
538 # the result folder might have wrong permission setting.
Dan Shiafa63872016-02-23 15:32:31 -0800539 try:
540 correct_results_folder_permission(results)
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800541 except:
542 # Ignore any error as the user may not have root
543 # permission to run sudo command.
544 pass
Aviv Keshet92bf7b62017-02-13 15:34:03 -0800545 metric_name = ('chromeos/autotest/experimental/'
546 'autoserv_job_run_duration')
547 f = {'in_container': utils.is_in_container(),
548 'success': False}
549 with metrics.SecondsTimer(metric_name, fields=f) as c:
550 job.run(install_before, install_after,
551 verify_job_repo_url=verify_job_repo_url,
552 only_collect_crashinfo=collect_crashinfo,
553 skip_crash_collection=skip_crash_collection,
554 job_labels=job_labels,
555 use_packaging=(not no_use_packaging))
556 c['success'] = True
557
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800558 finally:
Hidehiko Abe06893302017-06-24 07:32:38 +0900559 job.close()
Dan Shiffd5b822017-07-14 11:16:23 -0700560 # Special task doesn't run parse, so result summary needs to be
561 # built here.
562 if results and (repair or verify or reset or cleanup or provision):
Dan Shi4f8c0242017-07-07 15:34:49 -0700563 # Throttle the result on the server side.
564 try:
565 result_utils.execute(
566 results, control_data.DEFAULT_MAX_RESULT_SIZE_KB)
567 except:
568 logging.exception(
569 'Non-critical failure: Failed to throttle results '
570 'in directory %s.', results)
571 # Build result view and report metrics for result sizes.
Dan Shiffd5b822017-07-14 11:16:23 -0700572 site_utils.collect_result_sizes(results)
jadmanski0afbb632008-06-06 21:10:57 +0000573 except:
jadmanski27b37ea2008-10-29 23:54:31 +0000574 exit_code = 1
jadmanski0afbb632008-06-06 21:10:57 +0000575 traceback.print_exc()
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800576 finally:
577 metrics.Flush()
mbligha46678d2008-05-01 20:00:01 +0000578
mblighff7d61f2008-12-22 14:53:35 +0000579 if pid_file_manager:
580 pid_file_manager.num_tests_failed = job.num_tests_failed
581 pid_file_manager.close_file(exit_code)
jadmanskie0dffc32008-12-15 17:30:30 +0000582 job.cleanup_parser()
showard21baa452008-10-21 00:08:39 +0000583
jadmanski27b37ea2008-10-29 23:54:31 +0000584 sys.exit(exit_code)
mbligha46678d2008-05-01 20:00:01 +0000585
586
Paul Hobbs68d98592017-08-22 02:22:49 -0700587def record_autoserv(options, start_time):
Fang Deng042c1472014-10-23 13:56:41 -0700588 """Record autoserv end-to-end time in metadata db.
589
590 @param options: parser options.
Paul Hobbs68d98592017-08-22 02:22:49 -0700591 @param start_time: When autoserv started
Fang Deng042c1472014-10-23 13:56:41 -0700592 """
593 # Get machine hostname
594 machines = options.machines.replace(
595 ',', ' ').strip().split() if options.machines else []
596 num_machines = len(machines)
597 if num_machines > 1:
598 # Skip the case where atomic group is used.
599 return
600 elif num_machines == 0:
601 machines.append('hostless')
602
603 # Determine the status that will be reported.
Paul Hobbs68d98592017-08-22 02:22:49 -0700604 status = get_job_status(options)
605 is_special_task = status not in [
Paul Hobbse9fd5572017-08-22 02:48:25 -0700606 job_overhead.STATUS.RUNNING, job_overhead.STATUS.GATHERING]
Paul Hobbs68d98592017-08-22 02:22:49 -0700607 job_or_task_id = job_directories.get_job_id_or_task_id(options.results)
608 duration_secs = (datetime.datetime.now() - start_time).total_seconds()
609 job_overhead.record_state_duration(
610 job_or_task_id, machines[0], status, duration_secs,
611 is_special_task=is_special_task)
612
613
614def get_job_status(options):
615 """Returns the HQE Status for this run.
616
617 @param options: parser options.
618 """
Fang Deng042c1472014-10-23 13:56:41 -0700619 s = job_overhead.STATUS
620 task_mapping = {
621 'reset': s.RESETTING, 'verify': s.VERIFYING,
622 'provision': s.PROVISIONING, 'repair': s.REPAIRING,
623 'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
Paul Hobbs68d98592017-08-22 02:22:49 -0700624 match = [task for task in task_mapping if getattr(options, task, False)]
625 return task_mapping[match[0]] if match else s.RUNNING
Fang Deng042c1472014-10-23 13:56:41 -0700626
627
mbligha46678d2008-05-01 20:00:01 +0000628def main():
Fang Deng042c1472014-10-23 13:56:41 -0700629 start_time = datetime.datetime.now()
jadmanski0afbb632008-06-06 21:10:57 +0000630 # grab the parser
631 parser = autoserv_parser.autoserv_parser
mbligha5cb4062009-02-17 15:53:39 +0000632 parser.parse_args()
mbligha46678d2008-05-01 20:00:01 +0000633
jadmanski0afbb632008-06-06 21:10:57 +0000634 if len(sys.argv) == 1:
635 parser.parser.print_help()
636 sys.exit(1)
mbligha6f13082008-06-05 23:53:46 +0000637
showard75cdfee2009-06-10 17:40:41 +0000638 if parser.options.no_logging:
639 results = None
640 else:
641 results = parser.options.results
mbligh80e1eba2008-11-19 00:26:18 +0000642 if not results:
643 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
Dan Shi14de7622016-08-22 11:09:06 -0700644 results = os.path.abspath(results)
showard566d3c02010-01-12 18:57:01 +0000645 resultdir_exists = False
646 for filename in ('control.srv', 'status.log', '.autoserv_execute'):
647 if os.path.exists(os.path.join(results, filename)):
648 resultdir_exists = True
mbligh4608b002010-01-05 18:22:35 +0000649 if not parser.options.use_existing_results and resultdir_exists:
mbligh80e1eba2008-11-19 00:26:18 +0000650 error = "Error: results directory already exists: %s\n" % results
651 sys.stderr.write(error)
652 sys.exit(1)
mbligha788dc42009-03-26 21:10:16 +0000653
654 # Now that we certified that there's no leftover results dir from
655 # previous jobs, lets create the result dir since the logging system
656 # needs to create the log file in there.
657 if not os.path.isdir(results):
658 os.makedirs(results)
showard75cdfee2009-06-10 17:40:41 +0000659
Prathmesh Prabhu7ae68ae2017-06-15 00:29:31 -0700660 # If the job requires to run with server-side package, try to stage server-
661 # side package first. If that fails with error that autotest server package
662 # does not exist, fall back to run the job without using server-side
663 # packaging. If option warn_no_ssp is specified, that means autoserv is
664 # running in a drone does not support SSP, thus no need to stage server-side
665 # package.
666 ssp_url = None
667 ssp_url_warning = False
668 if (not parser.options.warn_no_ssp and parser.options.require_ssp):
Prathmesh Prabhu588007d2017-06-15 00:31:31 -0700669 ssp_url, ssp_error_msg = _stage_ssp(parser, results)
Prathmesh Prabhu7ae68ae2017-06-15 00:29:31 -0700670 # The build does not have autotest server package. Fall back to not
671 # to use server-side package. Logging is postponed until logging being
672 # set up.
673 ssp_url_warning = not ssp_url
674
Dan Shic68fefb2015-04-07 10:10:52 -0700675 # Server-side packaging will only be used if it's required and the package
676 # is available. If warn_no_ssp is specified, it means that autoserv is
677 # running in a drone does not have SSP supported and a warning will be logs.
678 # Therefore, it should not run with SSP.
679 use_ssp = (not parser.options.warn_no_ssp and parser.options.require_ssp
680 and ssp_url)
681 if use_ssp:
Dan Shie28de552015-05-06 16:51:58 -0700682 log_dir = os.path.join(results, 'ssp_logs') if results else None
Dan Shicf4d2032015-03-12 15:04:21 -0700683 if log_dir and not os.path.exists(log_dir):
684 os.makedirs(log_dir)
685 else:
686 log_dir = results
Dan Shi3f1b8a52015-04-21 11:11:06 -0700687
showard75cdfee2009-06-10 17:40:41 +0000688 logging_manager.configure_logging(
Dan Shicf4d2032015-03-12 15:04:21 -0700689 server_logging_config.ServerLoggingConfig(),
690 results_dir=log_dir,
showard10d84172009-06-18 23:16:50 +0000691 use_console=not parser.options.no_tee,
692 verbose=parser.options.verbose,
693 no_console_prefix=parser.options.no_console_prefix)
Dan Shicf4d2032015-03-12 15:04:21 -0700694
Dan Shi0b754c52015-04-20 14:20:38 -0700695 if ssp_url_warning:
696 logging.warn(
697 'Autoserv is required to run with server-side packaging. '
Dan Shi6bfbdb62017-09-25 13:33:53 -0700698 'However, no server-side package can be staged based on '
Dan Shi6450e142016-03-11 11:52:20 -0800699 '`--image`, host attribute job_repo_url or host OS version '
700 'label. It could be that the build to test is older than the '
Dan Shi6bfbdb62017-09-25 13:33:53 -0700701 'minimum version that supports server-side packaging, or no '
702 'devserver can be found to stage server-side package. The test '
Dan Shi14de7622016-08-22 11:09:06 -0700703 'will be executed without using erver-side packaging. '
704 'Following is the detailed error:\n%s', ssp_error_msg)
Dan Shi0b754c52015-04-20 14:20:38 -0700705
showard75cdfee2009-06-10 17:40:41 +0000706 if results:
mbligha788dc42009-03-26 21:10:16 +0000707 logging.info("Results placed in %s" % results)
mbligh10717632008-11-19 00:21:57 +0000708
mbligh4608b002010-01-05 18:22:35 +0000709 # wait until now to perform this check, so it get properly logged
Dan Shicf4d2032015-03-12 15:04:21 -0700710 if (parser.options.use_existing_results and not resultdir_exists and
Dan Shiff78f112015-06-12 13:34:02 -0700711 not utils.is_in_container()):
mbligh4608b002010-01-05 18:22:35 +0000712 logging.error("No existing results directory found: %s", results)
713 sys.exit(1)
714
Dan Shicf4d2032015-03-12 15:04:21 -0700715 logging.debug('autoserv is running in drone %s.', socket.gethostname())
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700716 logging.debug('autoserv command was: %s', ' '.join(sys.argv))
Prathmesh Prabhu7fc39c52018-03-21 14:08:30 -0700717 logging.debug('autoserv parsed options: %s', parser.options)
mbligh4608b002010-01-05 18:22:35 +0000718
Dan Shicf4d2032015-03-12 15:04:21 -0700719 if parser.options.write_pidfile and results:
mbligh4608b002010-01-05 18:22:35 +0000720 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
721 results)
jadmanskid5ab8c52008-12-03 16:27:07 +0000722 pid_file_manager.open_file()
mblighff7d61f2008-12-22 14:53:35 +0000723 else:
724 pid_file_manager = None
mbligha46678d2008-05-01 20:00:01 +0000725
Allen Lid5abdab2017-02-07 16:03:43 -0800726 autotest.Autotest.set_install_in_tmpdir(
jadmanskif22fea82008-11-26 20:57:07 +0000727 parser.options.install_in_tmpdir)
728
jadmanski0afbb632008-06-06 21:10:57 +0000729 exit_code = 0
Allen Lif146e872017-08-15 18:24:31 -0700730 # TODO(beeps): Extend this to cover different failure modes.
731 # Testing exceptions are matched against labels sent to autoserv. Eg,
732 # to allow only the hostless job to run, specify
733 # testing_exceptions: test_suite in the shadow_config. To allow both
734 # the hostless job and dummy_Pass to run, specify
735 # testing_exceptions: test_suite,dummy_Pass. You can figure out
736 # what label autoserv is invoked with by looking through the logs of a test
737 # for the autoserv command's -l option.
738 testing_exceptions = _CONFIG.get_config_value(
739 'AUTOSERV', 'testing_exceptions', type=list, default=[])
740 test_mode = _CONFIG.get_config_value(
741 'AUTOSERV', 'testing_mode', type=bool, default=False)
742 test_mode = (results_mocker and test_mode and not
743 any([ex in parser.options.label
744 for ex in testing_exceptions]))
745 is_task = (parser.options.verify or parser.options.repair or
746 parser.options.provision or parser.options.reset or
747 parser.options.cleanup or parser.options.collect_crashinfo)
Paul Hobbse9fd5572017-08-22 02:48:25 -0700748
749 trace_labels = {
750 'job_id': job_directories.get_job_id_or_task_id(
751 parser.options.results)
752 }
753 trace = cloud_trace.SpanStack(
754 labels=trace_labels,
755 global_context=parser.options.cloud_trace_context)
756 trace.enabled = parser.options.cloud_trace_context_enabled == 'True'
jadmanski0afbb632008-06-06 21:10:57 +0000757 try:
758 try:
Allen Lif146e872017-08-15 18:24:31 -0700759 if test_mode:
760 # The parser doesn't run on tasks anyway, so we can just return
761 # happy signals without faking results.
762 if not is_task:
763 machine = parser.options.results.split('/')[-1]
764
765 # TODO(beeps): The proper way to do this would be to
766 # refactor job creation so we can invoke job.record
767 # directly. To do that one needs to pipe the test_name
768 # through run_autoserv and bail just before invoking
769 # the server job. See the comment in
770 # puppylab/results_mocker for more context.
771 results_mocker.ResultsMocker(
772 'unknown-test', parser.options.results, machine
773 ).mock_results()
774 return
775 else:
Paul Hobbse9fd5572017-08-22 02:48:25 -0700776 with trace.Span(get_job_status(parser.options)):
777 run_autoserv(pid_file_manager, results, parser, ssp_url,
778 use_ssp)
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700779 except SystemExit as e:
jadmanski0afbb632008-06-06 21:10:57 +0000780 exit_code = e.code
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700781 if exit_code:
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700782 logging.exception('Uncaught SystemExit with code %s', exit_code)
783 except Exception:
jadmanski0afbb632008-06-06 21:10:57 +0000784 # If we don't know what happened, we'll classify it as
785 # an 'abort' and return 1.
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700786 logging.exception('Uncaught Exception, exit_code = 1.')
jadmanski0afbb632008-06-06 21:10:57 +0000787 exit_code = 1
788 finally:
mblighff7d61f2008-12-22 14:53:35 +0000789 if pid_file_manager:
790 pid_file_manager.close_file(exit_code)
Fang Deng042c1472014-10-23 13:56:41 -0700791 # Record the autoserv duration time. Must be called
792 # just before the system exits to ensure accuracy.
Paul Hobbs68d98592017-08-22 02:22:49 -0700793 record_autoserv(parser.options, start_time)
jadmanski0afbb632008-06-06 21:10:57 +0000794 sys.exit(exit_code)
mblighfaf0cd42007-11-19 16:00:24 +0000795
mblighbb421852008-03-11 22:36:16 +0000796
mbligha46678d2008-05-01 20:00:01 +0000797if __name__ == '__main__':
jadmanski0afbb632008-06-06 21:10:57 +0000798 main()