blob: 32023aa3321b98fcc556048618e3dfc517324e73 [file] [log] [blame]
mbligh6203ace2007-10-04 21:54:24 +00001#!/usr/bin/python -u
mbligh1ffd5dc2008-11-25 13:24:05 +00002# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
mbligh82648e52008-11-20 16:54:25 +00003# Released under the GPL v2
mblighdcd57a82007-07-11 23:06:47 +00004
mblighc8949b82007-07-23 16:33:58 +00005"""
Aviv Keshetde6bb192013-01-30 16:17:22 -08006Run a control file through the server side engine
mblighdcd57a82007-07-11 23:06:47 +00007"""
mbligh1ffd5dc2008-11-25 13:24:05 +00008
Fang Deng042c1472014-10-23 13:56:41 -07009import datetime
Paul Hobbs20cc72a2016-08-30 16:57:05 -070010import contextlib
Fang Deng042c1472014-10-23 13:56:41 -070011import getpass
12import logging
13import os
14import re
15import signal
Dan Shicf4d2032015-03-12 15:04:21 -070016import socket
Fang Deng042c1472014-10-23 13:56:41 -070017import sys
18import traceback
19import time
20import urllib2
mbligh1ffd5dc2008-11-25 13:24:05 +000021
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080022
mblighf5427bb2008-04-09 15:55:57 +000023import common
Dan Shi4f8c0242017-07-07 15:34:49 -070024from autotest_lib.client.bin.result_tools import utils as result_utils
25from autotest_lib.client.bin.result_tools import view as result_view
Dan Shia1ecd5c2013-06-06 11:21:31 -070026from autotest_lib.client.common_lib import control_data
Dan Shi32649b82015-08-29 20:53:36 -070027from autotest_lib.client.common_lib import error
Dan Shia1ecd5c2013-06-06 11:21:31 -070028from autotest_lib.client.common_lib import global_config
Dan Shi5ddf6a32015-05-02 00:22:01 -070029from autotest_lib.client.common_lib import utils
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080030
Dan Shi5e2efb72017-02-07 11:40:23 -080031try:
32 from chromite.lib import metrics
33except ImportError:
34 metrics = utils.metrics_mock
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080035
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -080036try:
37 from autotest_lib.puppylab import results_mocker
38except ImportError:
39 results_mocker = None
40
Dan Shia06f3e22015-09-03 16:15:15 -070041_CONFIG = global_config.global_config
42
Dan Shia1ecd5c2013-06-06 11:21:31 -070043
Jakob Jueliche497b552014-09-23 19:11:59 -070044# Number of seconds to wait before returning if testing mode is enabled
Prashanth B6285f6a2014-05-08 18:01:27 -070045TESTING_MODE_SLEEP_SECS = 1
Jakob Jueliche497b552014-09-23 19:11:59 -070046
mbligh9ff89cd2009-09-03 20:28:17 +000047
Kevin Cheng9b6930f2016-07-20 14:57:15 -070048from autotest_lib.server import frontend
showard75cdfee2009-06-10 17:40:41 +000049from autotest_lib.server import server_logging_config
showard043c62a2009-06-10 19:48:57 +000050from autotest_lib.server import server_job, utils, autoserv_parser, autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070051from autotest_lib.server import utils as server_utils
Paul Hobbs20cc72a2016-08-30 16:57:05 -070052from autotest_lib.server import site_utils
Kevin Chengadc99f92016-07-20 08:21:58 -070053from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Dan Shicf4d2032015-03-12 15:04:21 -070054from autotest_lib.site_utils import job_directories
Fang Deng042c1472014-10-23 13:56:41 -070055from autotest_lib.site_utils import job_overhead
Dan Shicf4d2032015-03-12 15:04:21 -070056from autotest_lib.site_utils import lxc
Ben Kwa966db082017-06-05 14:17:23 -070057from autotest_lib.site_utils.lxc import utils as lxc_utils
showard75cdfee2009-06-10 17:40:41 +000058from autotest_lib.client.common_lib import pidfile, logging_manager
mbligh92c0fc22008-11-20 16:52:23 +000059
Paul Hobbs20cc72a2016-08-30 16:57:05 -070060
Dan Shicf4d2032015-03-12 15:04:21 -070061# Control segment to stage server-side package.
62STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
63 'stage_server_side_package')
64
Dan Shia06f3e22015-09-03 16:15:15 -070065# Command line to start servod in a moblab.
66START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s'
67STOP_SERVOD_CMD = 'sudo stop servod'
68
Alex Millerf1af17e2013-01-09 22:50:32 -080069def log_alarm(signum, frame):
70 logging.error("Received SIGALARM. Ignoring and continuing on.")
Alex Miller0528d6f2013-01-11 10:49:48 -080071 sys.exit(1)
Alex Millerf1af17e2013-01-09 22:50:32 -080072
Dan Shicf4d2032015-03-12 15:04:21 -070073
74def _get_machines(parser):
75 """Get a list of machine names from command line arg -m or a file.
76
77 @param parser: Parser for the command line arguments.
78
79 @return: A list of machine names from command line arg -m or the
80 machines file specified in the command line arg -M.
81 """
82 if parser.options.machines:
83 machines = parser.options.machines.replace(',', ' ').strip().split()
84 else:
85 machines = []
86 machines_file = parser.options.machines_file
87 if machines_file:
88 machines = []
89 for m in open(machines_file, 'r').readlines():
90 # remove comments, spaces
91 m = re.sub('#.*', '', m).strip()
92 if m:
93 machines.append(m)
94 logging.debug('Read list of machines from file: %s', machines_file)
95 logging.debug('Machines: %s', ','.join(machines))
96
97 if machines:
98 for machine in machines:
99 if not machine or re.search('\s', machine):
100 parser.parser.error("Invalid machine: %s" % str(machine))
101 machines = list(set(machines))
102 machines.sort()
103 return machines
104
105
106def _stage_ssp(parser):
107 """Stage server-side package.
108
109 This function calls a control segment to stage server-side package based on
110 the job and autoserv command line option. The detail implementation could
111 be different for each host type. Currently, only CrosHost has
112 stage_server_side_package function defined.
113 The script returns None if no server-side package is available. However,
114 it may raise exception if it failed for reasons other than artifact (the
115 server-side package) not found.
116
117 @param parser: Command line arguments parser passed in the autoserv process.
118
Dan Shi14de7622016-08-22 11:09:06 -0700119 @return: (ssp_url, error_msg), where
120 ssp_url is a url to the autotest server-side package. None if
121 server-side package is not supported.
122 error_msg is a string indicating the failures. None if server-
123 side package is staged successfully.
Dan Shicf4d2032015-03-12 15:04:21 -0700124 """
Kevin Chengadc99f92016-07-20 08:21:58 -0700125 machines_list = _get_machines(parser)
Prathmesh Prabhucbebd982017-01-10 15:59:43 -0800126 machines_list = server_job.get_machine_dicts(
127 machines_list, parser.options.lab, parser.options.host_attributes)
Kevin Chengadc99f92016-07-20 08:21:58 -0700128
Dan Shi36cfd832014-10-10 13:38:51 -0700129 # If test_source_build is not specified, default to use server-side test
130 # code from build specified in --image.
Kevin Chengadc99f92016-07-20 08:21:58 -0700131 namespace = {'machines': machines_list,
Dan Shi36cfd832014-10-10 13:38:51 -0700132 'image': (parser.options.test_source_build or
133 parser.options.image),}
Dan Shicf4d2032015-03-12 15:04:21 -0700134 script_locals = {}
135 execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
Dan Shi14de7622016-08-22 11:09:06 -0700136 return script_locals['ssp_url'], script_locals['error_msg']
Dan Shicf4d2032015-03-12 15:04:21 -0700137
138
Dan Shiafa63872016-02-23 15:32:31 -0800139def _run_with_ssp(job, container_name, job_id, results, parser, ssp_url,
Dan Shi3be35af2016-08-25 23:22:40 -0700140 job_folder, machines):
Dan Shicf4d2032015-03-12 15:04:21 -0700141 """Run the server job with server-side packaging.
142
Dan Shi37befda2015-12-07 13:16:56 -0800143 @param job: The server job object.
Dan Shicf4d2032015-03-12 15:04:21 -0700144 @param container_name: Name of the container to run the test.
145 @param job_id: ID of the test job.
146 @param results: Folder to store results. This could be different from
147 parser.options.results:
148 parser.options.results can be set to None for results to be
149 stored in a temp folder.
150 results can be None for autoserv run requires no logging.
151 @param parser: Command line parser that contains the options.
152 @param ssp_url: url of the staged server-side package.
Dan Shiafa63872016-02-23 15:32:31 -0800153 @param job_folder: Name of the job result folder.
Dan Shi3be35af2016-08-25 23:22:40 -0700154 @param machines: A list of machines to run the test.
Dan Shicf4d2032015-03-12 15:04:21 -0700155 """
156 bucket = lxc.ContainerBucket()
157 control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
158 else None)
Dan Shi37befda2015-12-07 13:16:56 -0800159 try:
Dan Shi3be35af2016-08-25 23:22:40 -0700160 dut_name = machines[0] if len(machines) >= 1 else None
Dan Shi37befda2015-12-07 13:16:56 -0800161 test_container = bucket.setup_test(container_name, job_id, ssp_url,
Dan Shiafa63872016-02-23 15:32:31 -0800162 results, control=control,
Dan Shi3be35af2016-08-25 23:22:40 -0700163 job_folder=job_folder,
164 dut_name=dut_name)
Dan Shi37befda2015-12-07 13:16:56 -0800165 except Exception as e:
166 job.record('FAIL', None, None,
167 'Failed to setup container for test: %s. Check logs in '
168 'ssp_logs folder for more details.' % e)
169 raise
170
Dan Shicf4d2032015-03-12 15:04:21 -0700171 args = sys.argv[:]
172 args.remove('--require-ssp')
Dan Shi77b79a62015-07-29 16:22:05 -0700173 # --parent_job_id is only useful in autoserv running in host, not in
174 # container. Include this argument will cause test to fail for builds before
175 # CL 286265 was merged.
176 if '--parent_job_id' in args:
177 index = args.index('--parent_job_id')
178 args.remove('--parent_job_id')
179 # Remove the actual parent job id in command line arg.
180 del args[index]
Dan Shicf4d2032015-03-12 15:04:21 -0700181
182 # A dictionary of paths to replace in the command line. Key is the path to
183 # be replaced with the one in value.
184 paths_to_replace = {}
185 # Replace the control file path with the one in container.
186 if control:
187 container_control_filename = os.path.join(
188 lxc.CONTROL_TEMP_PATH, os.path.basename(control))
189 paths_to_replace[control] = container_control_filename
190 # Update result directory with the one in container.
Dan Shi65374e22016-09-15 16:14:05 -0700191 container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_folder)
Dan Shicf4d2032015-03-12 15:04:21 -0700192 if parser.options.results:
Dan Shicf4d2032015-03-12 15:04:21 -0700193 paths_to_replace[parser.options.results] = container_result_dir
194 # Update parse_job directory with the one in container. The assumption is
195 # that the result folder to be parsed is always the same as the results_dir.
196 if parser.options.parse_job:
Dan Shicf4d2032015-03-12 15:04:21 -0700197 paths_to_replace[parser.options.parse_job] = container_result_dir
198
199 args = [paths_to_replace.get(arg, arg) for arg in args]
200
201 # Apply --use-existing-results, results directory is aready created and
202 # mounted in container. Apply this arg to avoid exception being raised.
203 if not '--use-existing-results' in args:
204 args.append('--use-existing-results')
205
206 # Make sure autoserv running in container using a different pid file.
207 if not '--pidfile-label' in args:
208 args.extend(['--pidfile-label', 'container_autoserv'])
209
Dan Shid1f51232015-04-18 00:29:14 -0700210 cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
Dan Shicf4d2032015-03-12 15:04:21 -0700211 logging.info('Run command in container: %s', cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700212 success = False
Dan Shicf4d2032015-03-12 15:04:21 -0700213 try:
214 test_container.attach_run(cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700215 success = True
Dan Shi9d3454e2015-12-08 09:16:08 -0800216 except Exception as e:
217 # If the test run inside container fails without generating any log,
218 # write a message to status.log to help troubleshooting.
219 debug_files = os.listdir(os.path.join(results, 'debug'))
220 if not debug_files:
221 job.record('FAIL', None, None,
222 'Failed to run test inside the container: %s. Check '
223 'logs in ssp_logs folder for more details.' % e)
224 raise
Dan Shicf4d2032015-03-12 15:04:21 -0700225 finally:
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -0800226 metrics.Counter(
227 'chromeos/autotest/experimental/execute_job_in_ssp').increment(
228 fields={'success': success})
Dan Shicf4d2032015-03-12 15:04:21 -0700229 test_container.destroy()
230
231
Dan Shi3f1b8a52015-04-21 11:11:06 -0700232def correct_results_folder_permission(results):
233 """Make sure the results folder has the right permission settings.
234
235 For tests running with server-side packaging, the results folder has the
236 owner of root. This must be changed to the user running the autoserv
237 process, so parsing job can access the results folder.
238 TODO(dshi): crbug.com/459344 Remove this function when test container can be
239 unprivileged container.
240
241 @param results: Path to the results folder.
242
243 """
244 if not results:
245 return
246
Aviv Keshetc03de792017-07-18 14:24:31 -0700247 utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
248 utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
Dan Shi3f1b8a52015-04-21 11:11:06 -0700249
250
Dan Shia06f3e22015-09-03 16:15:15 -0700251def _start_servod(machine):
252 """Try to start servod in moblab if it's not already running or running with
253 different board or port.
254
255 @param machine: Name of the dut used for test.
256 """
257 if not utils.is_moblab():
258 return
259
Dan Shi1cded882015-09-23 16:52:26 -0700260 logging.debug('Trying to start servod.')
Dan Shia06f3e22015-09-03 16:15:15 -0700261 try:
Kevin Cheng9b6930f2016-07-20 14:57:15 -0700262 afe = frontend.AFE()
Dan Shia06f3e22015-09-03 16:15:15 -0700263 board = server_utils.get_board_from_afe(machine, afe)
264 hosts = afe.get_hosts(hostname=machine)
265 servo_host = hosts[0].attributes.get('servo_host', None)
266 servo_port = hosts[0].attributes.get('servo_port', 9999)
267 if not servo_host in ['localhost', '127.0.0.1']:
Dan Shi1cded882015-09-23 16:52:26 -0700268 logging.warn('Starting servod is aborted. The dut\'s servo_host '
269 'attribute is not set to localhost.')
Dan Shia06f3e22015-09-03 16:15:15 -0700270 return
271 except (urllib2.HTTPError, urllib2.URLError):
272 # Ignore error if RPC failed to get board
273 logging.error('Failed to get board name from AFE. Start servod is '
274 'aborted')
275 return
276
277 try:
278 pid = utils.run('pgrep servod').stdout
279 cmd_line = utils.run('ps -fp %s' % pid).stdout
280 if ('--board %s' % board in cmd_line and
281 '--port %s' % servo_port in cmd_line):
282 logging.debug('Servod is already running with given board and port.'
283 ' There is no need to restart servod.')
284 return
285 logging.debug('Servod is running with different board or port. '
286 'Stopping existing servod.')
287 utils.run('sudo stop servod')
288 except error.CmdError:
289 # servod is not running.
290 pass
291
292 try:
293 utils.run(START_SERVOD_CMD % (board, servo_port))
294 logging.debug('Servod is started')
295 except error.CmdError as e:
296 logging.error('Servod failed to be started, error: %s', e)
297
298
Dan Shic68fefb2015-04-07 10:10:52 -0700299def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700300 """Run server job with given options.
301
302 @param pid_file_manager: PidFileManager used to monitor the autoserv process
303 @param results: Folder to store results.
304 @param parser: Parser for the command line arguments.
305 @param ssp_url: Url to server-side package.
Dan Shic68fefb2015-04-07 10:10:52 -0700306 @param use_ssp: Set to True to run with server-side packaging.
Dan Shicf4d2032015-03-12 15:04:21 -0700307 """
Dan Shiec1d47d2015-02-13 11:38:13 -0800308 if parser.options.warn_no_ssp:
Dan Shic68fefb2015-04-07 10:10:52 -0700309 # Post a warning in the log.
Dan Shiec1d47d2015-02-13 11:38:13 -0800310 logging.warn('Autoserv is required to run with server-side packaging. '
311 'However, no drone is found to support server-side '
312 'packaging. The test will be executed in a drone without '
313 'server-side packaging supported.')
314
jadmanski0afbb632008-06-06 21:10:57 +0000315 # send stdin to /dev/null
316 dev_null = os.open(os.devnull, os.O_RDONLY)
317 os.dup2(dev_null, sys.stdin.fileno())
318 os.close(dev_null)
mblighdbf37612007-11-24 19:38:11 +0000319
Dan Shie8aeb662016-06-30 11:22:03 -0700320 # Create separate process group if the process is not a process group
321 # leader. This allows autoserv process to keep running after the caller
322 # process (drone manager call) exits.
323 if os.getpid() != os.getpgid(0):
324 os.setsid()
mbligh1d42d4e2007-11-05 22:42:00 +0000325
Dan Shicf4d2032015-03-12 15:04:21 -0700326 # Container name is predefined so the container can be destroyed in
327 # handle_sigterm.
328 job_or_task_id = job_directories.get_job_id_or_task_id(
329 parser.options.results)
330 container_name = (lxc.TEST_CONTAINER_NAME_FMT %
Dan Shid68d51c2015-04-21 17:00:42 -0700331 (job_or_task_id, time.time(), os.getpid()))
Dan Shiafa63872016-02-23 15:32:31 -0800332 job_folder = job_directories.get_job_folder_name(parser.options.results)
Dan Shicf4d2032015-03-12 15:04:21 -0700333
jadmanski0afbb632008-06-06 21:10:57 +0000334 # Implement SIGTERM handler
mblighc2299562009-07-02 19:00:36 +0000335 def handle_sigterm(signum, frame):
Simran Basi9d9b7292013-10-16 16:44:07 -0700336 logging.debug('Received SIGTERM')
mblighff7d61f2008-12-22 14:53:35 +0000337 if pid_file_manager:
338 pid_file_manager.close_file(1, signal.SIGTERM)
Simran Basi49e21e62013-10-17 12:40:33 -0700339 logging.debug('Finished writing to pid_file. Killing process.')
Dan Shi3f1b8a52015-04-21 11:11:06 -0700340
341 # Update results folder's file permission. This needs to be done ASAP
342 # before the parsing process tries to access the log.
343 if use_ssp and results:
344 correct_results_folder_permission(results)
345
Simran Basid6b83772014-01-06 16:31:30 -0800346 # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
347 # This sleep allows the pending output to be logged before the kill
348 # signal is sent.
349 time.sleep(.1)
Dan Shic68fefb2015-04-07 10:10:52 -0700350 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700351 logging.debug('Destroy container %s before aborting the autoserv '
352 'process.', container_name)
353 try:
354 bucket = lxc.ContainerBucket()
355 container = bucket.get(container_name)
356 if container:
357 container.destroy()
358 else:
359 logging.debug('Container %s is not found.', container_name)
360 except:
361 # Handle any exception so the autoserv process can be aborted.
Dan Shi65374e22016-09-15 16:14:05 -0700362 logging.exception('Failed to destroy container %s.',
363 container_name)
Dan Shie4a4f9f2015-07-20 09:00:25 -0700364 # Try to correct the result file permission again after the
365 # container is destroyed, as the container might have created some
366 # new files in the result folder.
367 if results:
368 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700369
jadmanski0afbb632008-06-06 21:10:57 +0000370 os.killpg(os.getpgrp(), signal.SIGKILL)
mblighfaf0cd42007-11-19 16:00:24 +0000371
jadmanski0afbb632008-06-06 21:10:57 +0000372 # Set signal handler
mblighc2299562009-07-02 19:00:36 +0000373 signal.signal(signal.SIGTERM, handle_sigterm)
mbligha46678d2008-05-01 20:00:01 +0000374
Simran Basid6b83772014-01-06 16:31:30 -0800375 # faulthandler is only needed to debug in the Lab and is not avaliable to
376 # be imported in the chroot as part of VMTest, so Try-Except it.
377 try:
378 import faulthandler
379 faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
380 logging.debug('faulthandler registered on SIGTERM.')
381 except ImportError:
Christopher Grant4beca022015-06-16 15:14:47 -0400382 sys.exc_clear()
Simran Basid6b83772014-01-06 16:31:30 -0800383
David Rochberg8a60d1e2011-02-01 14:22:07 -0500384 # Ignore SIGTTOU's generated by output from forked children.
385 signal.signal(signal.SIGTTOU, signal.SIG_IGN)
386
Alex Millerf1af17e2013-01-09 22:50:32 -0800387 # If we received a SIGALARM, let's be loud about it.
388 signal.signal(signal.SIGALRM, log_alarm)
389
mbligha5f5e542009-12-30 16:57:49 +0000390 # Server side tests that call shell scripts often depend on $USER being set
391 # but depending on how you launch your autotest scheduler it may not be set.
392 os.environ['USER'] = getpass.getuser()
393
mblighb2bea302008-07-24 20:25:57 +0000394 label = parser.options.label
mbligh374f3412009-05-13 21:29:45 +0000395 group_name = parser.options.group_name
mblighb2bea302008-07-24 20:25:57 +0000396 user = parser.options.user
397 client = parser.options.client
398 server = parser.options.server
jadmanski0afbb632008-06-06 21:10:57 +0000399 install_before = parser.options.install_before
mblighb2bea302008-07-24 20:25:57 +0000400 install_after = parser.options.install_after
401 verify = parser.options.verify
402 repair = parser.options.repair
showard45ae8192008-11-05 19:32:53 +0000403 cleanup = parser.options.cleanup
Alex Millercb79ba72013-05-29 14:43:00 -0700404 provision = parser.options.provision
Dan Shi07e09af2013-04-12 09:31:29 -0700405 reset = parser.options.reset
Alex Miller667b5f22014-02-28 15:33:39 -0800406 job_labels = parser.options.job_labels
mblighb2bea302008-07-24 20:25:57 +0000407 no_tee = parser.options.no_tee
jadmanski0afbb632008-06-06 21:10:57 +0000408 parse_job = parser.options.parse_job
mblighe7d9c602009-07-02 19:02:33 +0000409 execution_tag = parser.options.execution_tag
410 if not execution_tag:
411 execution_tag = parse_job
jadmanski0afbb632008-06-06 21:10:57 +0000412 ssh_user = parser.options.ssh_user
413 ssh_port = parser.options.ssh_port
414 ssh_pass = parser.options.ssh_pass
jadmanskidef0c3c2009-03-25 20:07:10 +0000415 collect_crashinfo = parser.options.collect_crashinfo
mblighe0cbc912010-03-11 18:03:07 +0000416 control_filename = parser.options.control_filename
Scott Zawalski91493c82013-01-25 16:15:20 -0500417 test_retry = parser.options.test_retry
beepscb6f1e22013-06-28 19:14:10 -0700418 verify_job_repo_url = parser.options.verify_job_repo_url
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700419 skip_crash_collection = parser.options.skip_crash_collection
Aviv Keshet18ee3142013-08-12 15:01:51 -0700420 ssh_verbosity = int(parser.options.ssh_verbosity)
Fang Deng6cc20de2013-09-06 15:47:32 -0700421 ssh_options = parser.options.ssh_options
Dan Shib669cbd2013-09-13 11:17:17 -0700422 no_use_packaging = parser.options.no_use_packaging
Simran Basi1bf60eb2015-12-01 16:39:29 -0800423 host_attributes = parser.options.host_attributes
424 in_lab = bool(parser.options.lab)
mbligha46678d2008-05-01 20:00:01 +0000425
mblighb2bea302008-07-24 20:25:57 +0000426 # can't be both a client and a server side test
427 if client and server:
Eric Li861b2d52011-02-04 14:50:35 -0800428 parser.parser.error("Can not specify a test as both server and client!")
mblighb2bea302008-07-24 20:25:57 +0000429
Alex Millercb79ba72013-05-29 14:43:00 -0700430 if provision and client:
431 parser.parser.error("Cannot specify provisioning and client!")
432
433 is_special_task = (verify or repair or cleanup or collect_crashinfo or
Dan Shi07e09af2013-04-12 09:31:29 -0700434 provision or reset)
Alex Millercb79ba72013-05-29 14:43:00 -0700435 if len(parser.args) < 1 and not is_special_task:
Eric Li861b2d52011-02-04 14:50:35 -0800436 parser.parser.error("Missing argument: control file")
mbligha46678d2008-05-01 20:00:01 +0000437
Aviv Keshet18ee3142013-08-12 15:01:51 -0700438 if ssh_verbosity > 0:
439 # ssh_verbosity is an integer between 0 and 3, inclusive
440 ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
Fang Dengd1c2b732013-08-20 12:59:46 -0700441 else:
442 ssh_verbosity_flag = ''
Aviv Keshet18ee3142013-08-12 15:01:51 -0700443
showard45ae8192008-11-05 19:32:53 +0000444 # We have a control file unless it's just a verify/repair/cleanup job
jadmanski0afbb632008-06-06 21:10:57 +0000445 if len(parser.args) > 0:
446 control = parser.args[0]
447 else:
448 control = None
mbligha46678d2008-05-01 20:00:01 +0000449
Dan Shicf4d2032015-03-12 15:04:21 -0700450 machines = _get_machines(parser)
mbligh374f3412009-05-13 21:29:45 +0000451 if group_name and len(machines) < 2:
Dan Shicf4d2032015-03-12 15:04:21 -0700452 parser.parser.error('-G %r may only be supplied with more than one '
453 'machine.' % group_name)
mbligh374f3412009-05-13 21:29:45 +0000454
Christopher Wiley8a91f232013-07-09 11:02:27 -0700455 kwargs = {'group_name': group_name, 'tag': execution_tag,
Dan Shicf4d2032015-03-12 15:04:21 -0700456 'disable_sysinfo': parser.options.disable_sysinfo}
Dan Shi70647ca2015-07-16 22:52:35 -0700457 if parser.options.parent_job_id:
458 kwargs['parent_job_id'] = int(parser.options.parent_job_id)
mblighe0cbc912010-03-11 18:03:07 +0000459 if control_filename:
460 kwargs['control_filename'] = control_filename
Simran Basi1bf60eb2015-12-01 16:39:29 -0800461 if host_attributes:
462 kwargs['host_attributes'] = host_attributes
463 kwargs['in_lab'] = in_lab
jadmanski0afbb632008-06-06 21:10:57 +0000464 job = server_job.server_job(control, parser.args[1:], results, label,
465 user, machines, client, parse_job,
Fang Dengd1c2b732013-08-20 12:59:46 -0700466 ssh_user, ssh_port, ssh_pass,
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700467 ssh_verbosity_flag, ssh_options,
468 test_retry, **kwargs)
Dan Shicf4d2032015-03-12 15:04:21 -0700469
showard75cdfee2009-06-10 17:40:41 +0000470 job.logging.start_logging()
mbligh4608b002010-01-05 18:22:35 +0000471 job.init_parser()
mbligha46678d2008-05-01 20:00:01 +0000472
mbligh161fe6f2008-06-19 16:26:04 +0000473 # perform checks
474 job.precheck()
475
jadmanski0afbb632008-06-06 21:10:57 +0000476 # run the job
477 exit_code = 0
Dan Shic1b8bdd2015-09-14 23:11:24 -0700478 auto_start_servod = _CONFIG.get_config_value(
479 'AUTOSERV', 'auto_start_servod', type=bool, default=False)
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700480
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800481 site_utils.SetupTsMonGlobalState('autoserv', indirect=False,
482 short_lived=True)
jadmanski0afbb632008-06-06 21:10:57 +0000483 try:
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800484 try:
485 if repair:
486 if auto_start_servod and len(machines) == 1:
487 _start_servod(machines[0])
488 job.repair(job_labels)
489 elif verify:
490 job.verify(job_labels)
491 elif provision:
492 job.provision(job_labels)
493 elif reset:
494 job.reset(job_labels)
495 elif cleanup:
496 job.cleanup(job_labels)
497 else:
498 if auto_start_servod and len(machines) == 1:
499 _start_servod(machines[0])
500 if use_ssp:
501 try:
502 _run_with_ssp(job, container_name, job_or_task_id,
503 results, parser, ssp_url, job_folder,
504 machines)
505 finally:
506 # Update the ownership of files in result folder.
507 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700508 else:
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800509 if collect_crashinfo:
510 # Update the ownership of files in result folder. If the
511 # job to collect crashinfo was running inside container
512 # (SSP) and crashed before correcting folder permission,
513 # the result folder might have wrong permission setting.
Dan Shiafa63872016-02-23 15:32:31 -0800514 try:
515 correct_results_folder_permission(results)
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800516 except:
517 # Ignore any error as the user may not have root
518 # permission to run sudo command.
519 pass
Aviv Keshet92bf7b62017-02-13 15:34:03 -0800520 metric_name = ('chromeos/autotest/experimental/'
521 'autoserv_job_run_duration')
522 f = {'in_container': utils.is_in_container(),
523 'success': False}
524 with metrics.SecondsTimer(metric_name, fields=f) as c:
525 job.run(install_before, install_after,
526 verify_job_repo_url=verify_job_repo_url,
527 only_collect_crashinfo=collect_crashinfo,
528 skip_crash_collection=skip_crash_collection,
529 job_labels=job_labels,
530 use_packaging=(not no_use_packaging))
531 c['success'] = True
532
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800533 finally:
Hidehiko Abe06893302017-06-24 07:32:38 +0900534 job.close()
Dan Shiffd5b822017-07-14 11:16:23 -0700535 # Special task doesn't run parse, so result summary needs to be
536 # built here.
537 if results and (repair or verify or reset or cleanup or provision):
Dan Shi4f8c0242017-07-07 15:34:49 -0700538 # Throttle the result on the server side.
539 try:
540 result_utils.execute(
541 results, control_data.DEFAULT_MAX_RESULT_SIZE_KB)
542 except:
543 logging.exception(
544 'Non-critical failure: Failed to throttle results '
545 'in directory %s.', results)
546 # Build result view and report metrics for result sizes.
Dan Shiffd5b822017-07-14 11:16:23 -0700547 site_utils.collect_result_sizes(results)
jadmanski0afbb632008-06-06 21:10:57 +0000548 except:
jadmanski27b37ea2008-10-29 23:54:31 +0000549 exit_code = 1
jadmanski0afbb632008-06-06 21:10:57 +0000550 traceback.print_exc()
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800551 finally:
552 metrics.Flush()
mbligha46678d2008-05-01 20:00:01 +0000553
mblighff7d61f2008-12-22 14:53:35 +0000554 if pid_file_manager:
555 pid_file_manager.num_tests_failed = job.num_tests_failed
556 pid_file_manager.close_file(exit_code)
jadmanskie0dffc32008-12-15 17:30:30 +0000557 job.cleanup_parser()
showard21baa452008-10-21 00:08:39 +0000558
jadmanski27b37ea2008-10-29 23:54:31 +0000559 sys.exit(exit_code)
mbligha46678d2008-05-01 20:00:01 +0000560
561
Fang Deng042c1472014-10-23 13:56:41 -0700562def record_autoserv(options, duration_secs):
563 """Record autoserv end-to-end time in metadata db.
564
565 @param options: parser options.
566 @param duration_secs: How long autoserv has taken, in secs.
567 """
568 # Get machine hostname
569 machines = options.machines.replace(
570 ',', ' ').strip().split() if options.machines else []
571 num_machines = len(machines)
572 if num_machines > 1:
573 # Skip the case where atomic group is used.
574 return
575 elif num_machines == 0:
576 machines.append('hostless')
577
578 # Determine the status that will be reported.
579 s = job_overhead.STATUS
580 task_mapping = {
581 'reset': s.RESETTING, 'verify': s.VERIFYING,
582 'provision': s.PROVISIONING, 'repair': s.REPAIRING,
583 'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
Dan Shi888cfca2015-07-31 15:49:00 -0700584 match = filter(lambda task: getattr(options, task, False) == True,
585 task_mapping)
Fang Deng042c1472014-10-23 13:56:41 -0700586 status = task_mapping[match[0]] if match else s.RUNNING
587 is_special_task = status not in [s.RUNNING, s.GATHERING]
Dan Shicf4d2032015-03-12 15:04:21 -0700588 job_or_task_id = job_directories.get_job_id_or_task_id(options.results)
Fang Deng042c1472014-10-23 13:56:41 -0700589 job_overhead.record_state_duration(
590 job_or_task_id, machines[0], status, duration_secs,
591 is_special_task=is_special_task)
592
593
mbligha46678d2008-05-01 20:00:01 +0000594def main():
Fang Deng042c1472014-10-23 13:56:41 -0700595 start_time = datetime.datetime.now()
jadmanski0afbb632008-06-06 21:10:57 +0000596 # grab the parser
597 parser = autoserv_parser.autoserv_parser
mbligha5cb4062009-02-17 15:53:39 +0000598 parser.parse_args()
mbligha46678d2008-05-01 20:00:01 +0000599
jadmanski0afbb632008-06-06 21:10:57 +0000600 if len(sys.argv) == 1:
601 parser.parser.print_help()
602 sys.exit(1)
mbligha6f13082008-06-05 23:53:46 +0000603
showard75cdfee2009-06-10 17:40:41 +0000604 if parser.options.no_logging:
605 results = None
606 else:
607 results = parser.options.results
mbligh80e1eba2008-11-19 00:26:18 +0000608 if not results:
609 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
Dan Shi14de7622016-08-22 11:09:06 -0700610 results = os.path.abspath(results)
showard566d3c02010-01-12 18:57:01 +0000611 resultdir_exists = False
612 for filename in ('control.srv', 'status.log', '.autoserv_execute'):
613 if os.path.exists(os.path.join(results, filename)):
614 resultdir_exists = True
mbligh4608b002010-01-05 18:22:35 +0000615 if not parser.options.use_existing_results and resultdir_exists:
mbligh80e1eba2008-11-19 00:26:18 +0000616 error = "Error: results directory already exists: %s\n" % results
617 sys.stderr.write(error)
618 sys.exit(1)
mbligha788dc42009-03-26 21:10:16 +0000619
620 # Now that we certified that there's no leftover results dir from
621 # previous jobs, lets create the result dir since the logging system
622 # needs to create the log file in there.
623 if not os.path.isdir(results):
624 os.makedirs(results)
showard75cdfee2009-06-10 17:40:41 +0000625
Prathmesh Prabhu7ae68ae2017-06-15 00:29:31 -0700626 # If the job requires to run with server-side package, try to stage server-
627 # side package first. If that fails with error that autotest server package
628 # does not exist, fall back to run the job without using server-side
629 # packaging. If option warn_no_ssp is specified, that means autoserv is
630 # running in a drone does not support SSP, thus no need to stage server-side
631 # package.
632 ssp_url = None
633 ssp_url_warning = False
634 if (not parser.options.warn_no_ssp and parser.options.require_ssp):
635 ssp_url, ssp_error_msg = _stage_ssp(parser)
636 # The build does not have autotest server package. Fall back to not
637 # to use server-side package. Logging is postponed until logging being
638 # set up.
639 ssp_url_warning = not ssp_url
640
Dan Shic68fefb2015-04-07 10:10:52 -0700641 # Server-side packaging will only be used if it's required and the package
642 # is available. If warn_no_ssp is specified, it means that autoserv is
643 # running in a drone does not have SSP supported and a warning will be logs.
644 # Therefore, it should not run with SSP.
645 use_ssp = (not parser.options.warn_no_ssp and parser.options.require_ssp
646 and ssp_url)
647 if use_ssp:
Dan Shie28de552015-05-06 16:51:58 -0700648 log_dir = os.path.join(results, 'ssp_logs') if results else None
Dan Shicf4d2032015-03-12 15:04:21 -0700649 if log_dir and not os.path.exists(log_dir):
650 os.makedirs(log_dir)
651 else:
652 log_dir = results
Dan Shi3f1b8a52015-04-21 11:11:06 -0700653
showard75cdfee2009-06-10 17:40:41 +0000654 logging_manager.configure_logging(
Dan Shicf4d2032015-03-12 15:04:21 -0700655 server_logging_config.ServerLoggingConfig(),
656 results_dir=log_dir,
showard10d84172009-06-18 23:16:50 +0000657 use_console=not parser.options.no_tee,
658 verbose=parser.options.verbose,
659 no_console_prefix=parser.options.no_console_prefix)
Dan Shicf4d2032015-03-12 15:04:21 -0700660
Dan Shi0b754c52015-04-20 14:20:38 -0700661 if ssp_url_warning:
662 logging.warn(
663 'Autoserv is required to run with server-side packaging. '
664 'However, no server-side package can be found based on '
Dan Shi6450e142016-03-11 11:52:20 -0800665 '`--image`, host attribute job_repo_url or host OS version '
666 'label. It could be that the build to test is older than the '
667 'minimum version that supports server-side packaging. The test '
Dan Shi14de7622016-08-22 11:09:06 -0700668 'will be executed without using erver-side packaging. '
669 'Following is the detailed error:\n%s', ssp_error_msg)
Dan Shi0b754c52015-04-20 14:20:38 -0700670
showard75cdfee2009-06-10 17:40:41 +0000671 if results:
mbligha788dc42009-03-26 21:10:16 +0000672 logging.info("Results placed in %s" % results)
mbligh10717632008-11-19 00:21:57 +0000673
mbligh4608b002010-01-05 18:22:35 +0000674 # wait until now to perform this check, so it get properly logged
Dan Shicf4d2032015-03-12 15:04:21 -0700675 if (parser.options.use_existing_results and not resultdir_exists and
Dan Shiff78f112015-06-12 13:34:02 -0700676 not utils.is_in_container()):
mbligh4608b002010-01-05 18:22:35 +0000677 logging.error("No existing results directory found: %s", results)
678 sys.exit(1)
679
Dan Shicf4d2032015-03-12 15:04:21 -0700680 logging.debug('autoserv is running in drone %s.', socket.gethostname())
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700681 logging.debug('autoserv command was: %s', ' '.join(sys.argv))
mbligh4608b002010-01-05 18:22:35 +0000682
Dan Shicf4d2032015-03-12 15:04:21 -0700683 if parser.options.write_pidfile and results:
mbligh4608b002010-01-05 18:22:35 +0000684 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
685 results)
jadmanskid5ab8c52008-12-03 16:27:07 +0000686 pid_file_manager.open_file()
mblighff7d61f2008-12-22 14:53:35 +0000687 else:
688 pid_file_manager = None
mbligha46678d2008-05-01 20:00:01 +0000689
jadmanskif22fea82008-11-26 20:57:07 +0000690 autotest.BaseAutotest.set_install_in_tmpdir(
691 parser.options.install_in_tmpdir)
692
Dan Shia1ecd5c2013-06-06 11:21:31 -0700693 try:
694 # Take the first argument as control file name, get the test name from
Prathmesh Prabhueb7cfc12016-11-23 16:37:30 -0800695 # the control file.
Dan Shia1ecd5c2013-06-06 11:21:31 -0700696 if (len(parser.args) > 0 and parser.args[0] != '' and
697 parser.options.machines):
Dan Shibbc16132013-07-09 16:23:59 -0700698 try:
699 test_name = control_data.parse_control(parser.args[0],
700 raise_warnings=True).name
701 except control_data.ControlVariableException:
702 logging.debug('Failed to retrieve test name from control file.')
703 test_name = None
Dan Shia1ecd5c2013-06-06 11:21:31 -0700704 except control_data.ControlVariableException as e:
705 logging.error(str(e))
jadmanski0afbb632008-06-06 21:10:57 +0000706 exit_code = 0
Prashanth B6285f6a2014-05-08 18:01:27 -0700707 # TODO(beeps): Extend this to cover different failure modes.
708 # Testing exceptions are matched against labels sent to autoserv. Eg,
709 # to allow only the hostless job to run, specify
710 # testing_exceptions: test_suite in the shadow_config. To allow both
711 # the hostless job and dummy_Pass to run, specify
712 # testing_exceptions: test_suite,dummy_Pass. You can figure out
713 # what label autoserv is invoked with by looking through the logs of a test
714 # for the autoserv command's -l option.
Dan Shia06f3e22015-09-03 16:15:15 -0700715 testing_exceptions = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700716 'AUTOSERV', 'testing_exceptions', type=list, default=[])
Dan Shia06f3e22015-09-03 16:15:15 -0700717 test_mode = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700718 'AUTOSERV', 'testing_mode', type=bool, default=False)
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800719 test_mode = (results_mocker and test_mode and not
720 any([ex in parser.options.label
721 for ex in testing_exceptions]))
722 is_task = (parser.options.verify or parser.options.repair or
723 parser.options.provision or parser.options.reset or
724 parser.options.cleanup or parser.options.collect_crashinfo)
jadmanski0afbb632008-06-06 21:10:57 +0000725 try:
726 try:
Prashanth B6285f6a2014-05-08 18:01:27 -0700727 if test_mode:
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800728 # The parser doesn't run on tasks anyway, so we can just return
729 # happy signals without faking results.
730 if not is_task:
731 machine = parser.options.results.split('/')[-1]
732
733 # TODO(beeps): The proper way to do this would be to
734 # refactor job creation so we can invoke job.record
735 # directly. To do that one needs to pipe the test_name
736 # through run_autoserv and bail just before invoking
737 # the server job. See the comment in
738 # puppylab/results_mocker for more context.
739 results_mocker.ResultsMocker(
Prashanth Balasubramanian22dd2262014-11-28 18:19:18 -0800740 test_name if test_name else 'unknown-test',
741 parser.options.results, machine
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800742 ).mock_results()
743 return
Prashanth B6285f6a2014-05-08 18:01:27 -0700744 else:
Dan Shic68fefb2015-04-07 10:10:52 -0700745 run_autoserv(pid_file_manager, results, parser, ssp_url,
746 use_ssp)
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700747 except SystemExit as e:
jadmanski0afbb632008-06-06 21:10:57 +0000748 exit_code = e.code
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700749 if exit_code:
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700750 logging.exception('Uncaught SystemExit with code %s', exit_code)
751 except Exception:
jadmanski0afbb632008-06-06 21:10:57 +0000752 # If we don't know what happened, we'll classify it as
753 # an 'abort' and return 1.
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700754 logging.exception('Uncaught Exception, exit_code = 1.')
jadmanski0afbb632008-06-06 21:10:57 +0000755 exit_code = 1
756 finally:
mblighff7d61f2008-12-22 14:53:35 +0000757 if pid_file_manager:
758 pid_file_manager.close_file(exit_code)
Fang Deng042c1472014-10-23 13:56:41 -0700759 # Record the autoserv duration time. Must be called
760 # just before the system exits to ensure accuracy.
761 duration_secs = (datetime.datetime.now() - start_time).total_seconds()
762 record_autoserv(parser.options, duration_secs)
jadmanski0afbb632008-06-06 21:10:57 +0000763 sys.exit(exit_code)
mblighfaf0cd42007-11-19 16:00:24 +0000764
mblighbb421852008-03-11 22:36:16 +0000765
mbligha46678d2008-05-01 20:00:01 +0000766if __name__ == '__main__':
jadmanski0afbb632008-06-06 21:10:57 +0000767 main()