blob: 288ef50484d5ee4a815f9a65f3104f9ea299a58f [file] [log] [blame]
mbligh6203ace2007-10-04 21:54:24 +00001#!/usr/bin/python -u
mbligh1ffd5dc2008-11-25 13:24:05 +00002# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
mbligh82648e52008-11-20 16:54:25 +00003# Released under the GPL v2
mblighdcd57a82007-07-11 23:06:47 +00004
mblighc8949b82007-07-23 16:33:58 +00005"""
Aviv Keshetde6bb192013-01-30 16:17:22 -08006Run a control file through the server side engine
mblighdcd57a82007-07-11 23:06:47 +00007"""
mbligh1ffd5dc2008-11-25 13:24:05 +00008
Fang Deng042c1472014-10-23 13:56:41 -07009import datetime
Paul Hobbs20cc72a2016-08-30 16:57:05 -070010import contextlib
Fang Deng042c1472014-10-23 13:56:41 -070011import getpass
12import logging
13import os
14import re
15import signal
Dan Shicf4d2032015-03-12 15:04:21 -070016import socket
Fang Deng042c1472014-10-23 13:56:41 -070017import sys
18import traceback
19import time
20import urllib2
mbligh1ffd5dc2008-11-25 13:24:05 +000021
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080022
mblighf5427bb2008-04-09 15:55:57 +000023import common
Dan Shia1ecd5c2013-06-06 11:21:31 -070024from autotest_lib.client.common_lib import control_data
Dan Shi32649b82015-08-29 20:53:36 -070025from autotest_lib.client.common_lib import error
Dan Shia1ecd5c2013-06-06 11:21:31 -070026from autotest_lib.client.common_lib import global_config
Dan Shi5ddf6a32015-05-02 00:22:01 -070027from autotest_lib.client.common_lib import utils
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080028
Dan Shi5e2efb72017-02-07 11:40:23 -080029try:
30 from chromite.lib import metrics
31except ImportError:
32 metrics = utils.metrics_mock
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080033
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -080034try:
35 from autotest_lib.puppylab import results_mocker
36except ImportError:
37 results_mocker = None
38
Dan Shia06f3e22015-09-03 16:15:15 -070039_CONFIG = global_config.global_config
40
Dan Shia1ecd5c2013-06-06 11:21:31 -070041
Jakob Jueliche497b552014-09-23 19:11:59 -070042# Number of seconds to wait before returning if testing mode is enabled
Prashanth B6285f6a2014-05-08 18:01:27 -070043TESTING_MODE_SLEEP_SECS = 1
Jakob Jueliche497b552014-09-23 19:11:59 -070044
mbligh9ff89cd2009-09-03 20:28:17 +000045
Kevin Cheng9b6930f2016-07-20 14:57:15 -070046from autotest_lib.server import frontend
showard75cdfee2009-06-10 17:40:41 +000047from autotest_lib.server import server_logging_config
showard043c62a2009-06-10 19:48:57 +000048from autotest_lib.server import server_job, utils, autoserv_parser, autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070049from autotest_lib.server import utils as server_utils
Paul Hobbs20cc72a2016-08-30 16:57:05 -070050from autotest_lib.server import site_utils
Kevin Chengadc99f92016-07-20 08:21:58 -070051from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Dan Shicf4d2032015-03-12 15:04:21 -070052from autotest_lib.site_utils import job_directories
Fang Deng042c1472014-10-23 13:56:41 -070053from autotest_lib.site_utils import job_overhead
Dan Shicf4d2032015-03-12 15:04:21 -070054from autotest_lib.site_utils import lxc
Ben Kwa966db082017-06-05 14:17:23 -070055from autotest_lib.site_utils.lxc import utils as lxc_utils
showard75cdfee2009-06-10 17:40:41 +000056from autotest_lib.client.common_lib import pidfile, logging_manager
mbligh92c0fc22008-11-20 16:52:23 +000057
Paul Hobbs20cc72a2016-08-30 16:57:05 -070058
Dan Shicf4d2032015-03-12 15:04:21 -070059# Control segment to stage server-side package.
60STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
61 'stage_server_side_package')
62
Dan Shia06f3e22015-09-03 16:15:15 -070063# Command line to start servod in a moblab.
64START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s'
65STOP_SERVOD_CMD = 'sudo stop servod'
66
Alex Millerf1af17e2013-01-09 22:50:32 -080067def log_alarm(signum, frame):
68 logging.error("Received SIGALARM. Ignoring and continuing on.")
Alex Miller0528d6f2013-01-11 10:49:48 -080069 sys.exit(1)
Alex Millerf1af17e2013-01-09 22:50:32 -080070
Dan Shicf4d2032015-03-12 15:04:21 -070071
72def _get_machines(parser):
73 """Get a list of machine names from command line arg -m or a file.
74
75 @param parser: Parser for the command line arguments.
76
77 @return: A list of machine names from command line arg -m or the
78 machines file specified in the command line arg -M.
79 """
80 if parser.options.machines:
81 machines = parser.options.machines.replace(',', ' ').strip().split()
82 else:
83 machines = []
84 machines_file = parser.options.machines_file
85 if machines_file:
86 machines = []
87 for m in open(machines_file, 'r').readlines():
88 # remove comments, spaces
89 m = re.sub('#.*', '', m).strip()
90 if m:
91 machines.append(m)
92 logging.debug('Read list of machines from file: %s', machines_file)
93 logging.debug('Machines: %s', ','.join(machines))
94
95 if machines:
96 for machine in machines:
97 if not machine or re.search('\s', machine):
98 parser.parser.error("Invalid machine: %s" % str(machine))
99 machines = list(set(machines))
100 machines.sort()
101 return machines
102
103
104def _stage_ssp(parser):
105 """Stage server-side package.
106
107 This function calls a control segment to stage server-side package based on
108 the job and autoserv command line option. The detail implementation could
109 be different for each host type. Currently, only CrosHost has
110 stage_server_side_package function defined.
111 The script returns None if no server-side package is available. However,
112 it may raise exception if it failed for reasons other than artifact (the
113 server-side package) not found.
114
115 @param parser: Command line arguments parser passed in the autoserv process.
116
Dan Shi14de7622016-08-22 11:09:06 -0700117 @return: (ssp_url, error_msg), where
118 ssp_url is a url to the autotest server-side package. None if
119 server-side package is not supported.
120 error_msg is a string indicating the failures. None if server-
121 side package is staged successfully.
Dan Shicf4d2032015-03-12 15:04:21 -0700122 """
Kevin Chengadc99f92016-07-20 08:21:58 -0700123 machines_list = _get_machines(parser)
Prathmesh Prabhucbebd982017-01-10 15:59:43 -0800124 machines_list = server_job.get_machine_dicts(
125 machines_list, parser.options.lab, parser.options.host_attributes)
Kevin Chengadc99f92016-07-20 08:21:58 -0700126
Dan Shi36cfd832014-10-10 13:38:51 -0700127 # If test_source_build is not specified, default to use server-side test
128 # code from build specified in --image.
Kevin Chengadc99f92016-07-20 08:21:58 -0700129 namespace = {'machines': machines_list,
Dan Shi36cfd832014-10-10 13:38:51 -0700130 'image': (parser.options.test_source_build or
131 parser.options.image),}
Dan Shicf4d2032015-03-12 15:04:21 -0700132 script_locals = {}
133 execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
Dan Shi14de7622016-08-22 11:09:06 -0700134 return script_locals['ssp_url'], script_locals['error_msg']
Dan Shicf4d2032015-03-12 15:04:21 -0700135
136
Dan Shiafa63872016-02-23 15:32:31 -0800137def _run_with_ssp(job, container_name, job_id, results, parser, ssp_url,
Dan Shi3be35af2016-08-25 23:22:40 -0700138 job_folder, machines):
Dan Shicf4d2032015-03-12 15:04:21 -0700139 """Run the server job with server-side packaging.
140
Dan Shi37befda2015-12-07 13:16:56 -0800141 @param job: The server job object.
Dan Shicf4d2032015-03-12 15:04:21 -0700142 @param container_name: Name of the container to run the test.
143 @param job_id: ID of the test job.
144 @param results: Folder to store results. This could be different from
145 parser.options.results:
146 parser.options.results can be set to None for results to be
147 stored in a temp folder.
148 results can be None for autoserv run requires no logging.
149 @param parser: Command line parser that contains the options.
150 @param ssp_url: url of the staged server-side package.
Dan Shiafa63872016-02-23 15:32:31 -0800151 @param job_folder: Name of the job result folder.
Dan Shi3be35af2016-08-25 23:22:40 -0700152 @param machines: A list of machines to run the test.
Dan Shicf4d2032015-03-12 15:04:21 -0700153 """
154 bucket = lxc.ContainerBucket()
155 control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
156 else None)
Dan Shi37befda2015-12-07 13:16:56 -0800157 try:
Dan Shi3be35af2016-08-25 23:22:40 -0700158 dut_name = machines[0] if len(machines) >= 1 else None
Dan Shi37befda2015-12-07 13:16:56 -0800159 test_container = bucket.setup_test(container_name, job_id, ssp_url,
Dan Shiafa63872016-02-23 15:32:31 -0800160 results, control=control,
Dan Shi3be35af2016-08-25 23:22:40 -0700161 job_folder=job_folder,
162 dut_name=dut_name)
Dan Shi37befda2015-12-07 13:16:56 -0800163 except Exception as e:
164 job.record('FAIL', None, None,
165 'Failed to setup container for test: %s. Check logs in '
166 'ssp_logs folder for more details.' % e)
167 raise
168
Dan Shicf4d2032015-03-12 15:04:21 -0700169 args = sys.argv[:]
170 args.remove('--require-ssp')
Dan Shi77b79a62015-07-29 16:22:05 -0700171 # --parent_job_id is only useful in autoserv running in host, not in
172 # container. Include this argument will cause test to fail for builds before
173 # CL 286265 was merged.
174 if '--parent_job_id' in args:
175 index = args.index('--parent_job_id')
176 args.remove('--parent_job_id')
177 # Remove the actual parent job id in command line arg.
178 del args[index]
Dan Shicf4d2032015-03-12 15:04:21 -0700179
180 # A dictionary of paths to replace in the command line. Key is the path to
181 # be replaced with the one in value.
182 paths_to_replace = {}
183 # Replace the control file path with the one in container.
184 if control:
185 container_control_filename = os.path.join(
186 lxc.CONTROL_TEMP_PATH, os.path.basename(control))
187 paths_to_replace[control] = container_control_filename
188 # Update result directory with the one in container.
Dan Shi65374e22016-09-15 16:14:05 -0700189 container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_folder)
Dan Shicf4d2032015-03-12 15:04:21 -0700190 if parser.options.results:
Dan Shicf4d2032015-03-12 15:04:21 -0700191 paths_to_replace[parser.options.results] = container_result_dir
192 # Update parse_job directory with the one in container. The assumption is
193 # that the result folder to be parsed is always the same as the results_dir.
194 if parser.options.parse_job:
Dan Shicf4d2032015-03-12 15:04:21 -0700195 paths_to_replace[parser.options.parse_job] = container_result_dir
196
197 args = [paths_to_replace.get(arg, arg) for arg in args]
198
199 # Apply --use-existing-results, results directory is aready created and
200 # mounted in container. Apply this arg to avoid exception being raised.
201 if not '--use-existing-results' in args:
202 args.append('--use-existing-results')
203
204 # Make sure autoserv running in container using a different pid file.
205 if not '--pidfile-label' in args:
206 args.extend(['--pidfile-label', 'container_autoserv'])
207
Dan Shid1f51232015-04-18 00:29:14 -0700208 cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
Dan Shicf4d2032015-03-12 15:04:21 -0700209 logging.info('Run command in container: %s', cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700210 success = False
Dan Shicf4d2032015-03-12 15:04:21 -0700211 try:
212 test_container.attach_run(cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700213 success = True
Dan Shi9d3454e2015-12-08 09:16:08 -0800214 except Exception as e:
215 # If the test run inside container fails without generating any log,
216 # write a message to status.log to help troubleshooting.
217 debug_files = os.listdir(os.path.join(results, 'debug'))
218 if not debug_files:
219 job.record('FAIL', None, None,
220 'Failed to run test inside the container: %s. Check '
221 'logs in ssp_logs folder for more details.' % e)
222 raise
Dan Shicf4d2032015-03-12 15:04:21 -0700223 finally:
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -0800224 metrics.Counter(
225 'chromeos/autotest/experimental/execute_job_in_ssp').increment(
226 fields={'success': success})
Dan Shicf4d2032015-03-12 15:04:21 -0700227 test_container.destroy()
228
229
Dan Shi3f1b8a52015-04-21 11:11:06 -0700230def correct_results_folder_permission(results):
231 """Make sure the results folder has the right permission settings.
232
233 For tests running with server-side packaging, the results folder has the
234 owner of root. This must be changed to the user running the autoserv
235 process, so parsing job can access the results folder.
236 TODO(dshi): crbug.com/459344 Remove this function when test container can be
237 unprivileged container.
238
239 @param results: Path to the results folder.
240
241 """
242 if not results:
243 return
244
Aviv Keshetc03de792017-07-18 14:24:31 -0700245 utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
246 utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
Dan Shi3f1b8a52015-04-21 11:11:06 -0700247
248
Dan Shia06f3e22015-09-03 16:15:15 -0700249def _start_servod(machine):
250 """Try to start servod in moblab if it's not already running or running with
251 different board or port.
252
253 @param machine: Name of the dut used for test.
254 """
255 if not utils.is_moblab():
256 return
257
Dan Shi1cded882015-09-23 16:52:26 -0700258 logging.debug('Trying to start servod.')
Dan Shia06f3e22015-09-03 16:15:15 -0700259 try:
Kevin Cheng9b6930f2016-07-20 14:57:15 -0700260 afe = frontend.AFE()
Dan Shia06f3e22015-09-03 16:15:15 -0700261 board = server_utils.get_board_from_afe(machine, afe)
262 hosts = afe.get_hosts(hostname=machine)
263 servo_host = hosts[0].attributes.get('servo_host', None)
264 servo_port = hosts[0].attributes.get('servo_port', 9999)
265 if not servo_host in ['localhost', '127.0.0.1']:
Dan Shi1cded882015-09-23 16:52:26 -0700266 logging.warn('Starting servod is aborted. The dut\'s servo_host '
267 'attribute is not set to localhost.')
Dan Shia06f3e22015-09-03 16:15:15 -0700268 return
269 except (urllib2.HTTPError, urllib2.URLError):
270 # Ignore error if RPC failed to get board
271 logging.error('Failed to get board name from AFE. Start servod is '
272 'aborted')
273 return
274
275 try:
276 pid = utils.run('pgrep servod').stdout
277 cmd_line = utils.run('ps -fp %s' % pid).stdout
278 if ('--board %s' % board in cmd_line and
279 '--port %s' % servo_port in cmd_line):
280 logging.debug('Servod is already running with given board and port.'
281 ' There is no need to restart servod.')
282 return
283 logging.debug('Servod is running with different board or port. '
284 'Stopping existing servod.')
285 utils.run('sudo stop servod')
286 except error.CmdError:
287 # servod is not running.
288 pass
289
290 try:
291 utils.run(START_SERVOD_CMD % (board, servo_port))
292 logging.debug('Servod is started')
293 except error.CmdError as e:
294 logging.error('Servod failed to be started, error: %s', e)
295
296
Dan Shic68fefb2015-04-07 10:10:52 -0700297def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700298 """Run server job with given options.
299
300 @param pid_file_manager: PidFileManager used to monitor the autoserv process
301 @param results: Folder to store results.
302 @param parser: Parser for the command line arguments.
303 @param ssp_url: Url to server-side package.
Dan Shic68fefb2015-04-07 10:10:52 -0700304 @param use_ssp: Set to True to run with server-side packaging.
Dan Shicf4d2032015-03-12 15:04:21 -0700305 """
Dan Shiec1d47d2015-02-13 11:38:13 -0800306 if parser.options.warn_no_ssp:
Dan Shic68fefb2015-04-07 10:10:52 -0700307 # Post a warning in the log.
Dan Shiec1d47d2015-02-13 11:38:13 -0800308 logging.warn('Autoserv is required to run with server-side packaging. '
309 'However, no drone is found to support server-side '
310 'packaging. The test will be executed in a drone without '
311 'server-side packaging supported.')
312
jadmanski0afbb632008-06-06 21:10:57 +0000313 # send stdin to /dev/null
314 dev_null = os.open(os.devnull, os.O_RDONLY)
315 os.dup2(dev_null, sys.stdin.fileno())
316 os.close(dev_null)
mblighdbf37612007-11-24 19:38:11 +0000317
Dan Shie8aeb662016-06-30 11:22:03 -0700318 # Create separate process group if the process is not a process group
319 # leader. This allows autoserv process to keep running after the caller
320 # process (drone manager call) exits.
321 if os.getpid() != os.getpgid(0):
322 os.setsid()
mbligh1d42d4e2007-11-05 22:42:00 +0000323
Dan Shicf4d2032015-03-12 15:04:21 -0700324 # Container name is predefined so the container can be destroyed in
325 # handle_sigterm.
326 job_or_task_id = job_directories.get_job_id_or_task_id(
327 parser.options.results)
328 container_name = (lxc.TEST_CONTAINER_NAME_FMT %
Dan Shid68d51c2015-04-21 17:00:42 -0700329 (job_or_task_id, time.time(), os.getpid()))
Dan Shiafa63872016-02-23 15:32:31 -0800330 job_folder = job_directories.get_job_folder_name(parser.options.results)
Dan Shicf4d2032015-03-12 15:04:21 -0700331
jadmanski0afbb632008-06-06 21:10:57 +0000332 # Implement SIGTERM handler
mblighc2299562009-07-02 19:00:36 +0000333 def handle_sigterm(signum, frame):
Simran Basi9d9b7292013-10-16 16:44:07 -0700334 logging.debug('Received SIGTERM')
mblighff7d61f2008-12-22 14:53:35 +0000335 if pid_file_manager:
336 pid_file_manager.close_file(1, signal.SIGTERM)
Simran Basi49e21e62013-10-17 12:40:33 -0700337 logging.debug('Finished writing to pid_file. Killing process.')
Dan Shi3f1b8a52015-04-21 11:11:06 -0700338
339 # Update results folder's file permission. This needs to be done ASAP
340 # before the parsing process tries to access the log.
341 if use_ssp and results:
342 correct_results_folder_permission(results)
343
Simran Basid6b83772014-01-06 16:31:30 -0800344 # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
345 # This sleep allows the pending output to be logged before the kill
346 # signal is sent.
347 time.sleep(.1)
Dan Shic68fefb2015-04-07 10:10:52 -0700348 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700349 logging.debug('Destroy container %s before aborting the autoserv '
350 'process.', container_name)
351 try:
352 bucket = lxc.ContainerBucket()
353 container = bucket.get(container_name)
354 if container:
355 container.destroy()
356 else:
357 logging.debug('Container %s is not found.', container_name)
358 except:
359 # Handle any exception so the autoserv process can be aborted.
Dan Shi65374e22016-09-15 16:14:05 -0700360 logging.exception('Failed to destroy container %s.',
361 container_name)
Dan Shie4a4f9f2015-07-20 09:00:25 -0700362 # Try to correct the result file permission again after the
363 # container is destroyed, as the container might have created some
364 # new files in the result folder.
365 if results:
366 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700367
jadmanski0afbb632008-06-06 21:10:57 +0000368 os.killpg(os.getpgrp(), signal.SIGKILL)
mblighfaf0cd42007-11-19 16:00:24 +0000369
jadmanski0afbb632008-06-06 21:10:57 +0000370 # Set signal handler
mblighc2299562009-07-02 19:00:36 +0000371 signal.signal(signal.SIGTERM, handle_sigterm)
mbligha46678d2008-05-01 20:00:01 +0000372
Simran Basid6b83772014-01-06 16:31:30 -0800373 # faulthandler is only needed to debug in the Lab and is not avaliable to
374 # be imported in the chroot as part of VMTest, so Try-Except it.
375 try:
376 import faulthandler
377 faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
378 logging.debug('faulthandler registered on SIGTERM.')
379 except ImportError:
Christopher Grant4beca022015-06-16 15:14:47 -0400380 sys.exc_clear()
Simran Basid6b83772014-01-06 16:31:30 -0800381
David Rochberg8a60d1e2011-02-01 14:22:07 -0500382 # Ignore SIGTTOU's generated by output from forked children.
383 signal.signal(signal.SIGTTOU, signal.SIG_IGN)
384
Alex Millerf1af17e2013-01-09 22:50:32 -0800385 # If we received a SIGALARM, let's be loud about it.
386 signal.signal(signal.SIGALRM, log_alarm)
387
mbligha5f5e542009-12-30 16:57:49 +0000388 # Server side tests that call shell scripts often depend on $USER being set
389 # but depending on how you launch your autotest scheduler it may not be set.
390 os.environ['USER'] = getpass.getuser()
391
mblighb2bea302008-07-24 20:25:57 +0000392 label = parser.options.label
mbligh374f3412009-05-13 21:29:45 +0000393 group_name = parser.options.group_name
mblighb2bea302008-07-24 20:25:57 +0000394 user = parser.options.user
395 client = parser.options.client
396 server = parser.options.server
jadmanski0afbb632008-06-06 21:10:57 +0000397 install_before = parser.options.install_before
mblighb2bea302008-07-24 20:25:57 +0000398 install_after = parser.options.install_after
399 verify = parser.options.verify
400 repair = parser.options.repair
showard45ae8192008-11-05 19:32:53 +0000401 cleanup = parser.options.cleanup
Alex Millercb79ba72013-05-29 14:43:00 -0700402 provision = parser.options.provision
Dan Shi07e09af2013-04-12 09:31:29 -0700403 reset = parser.options.reset
Alex Miller667b5f22014-02-28 15:33:39 -0800404 job_labels = parser.options.job_labels
mblighb2bea302008-07-24 20:25:57 +0000405 no_tee = parser.options.no_tee
jadmanski0afbb632008-06-06 21:10:57 +0000406 parse_job = parser.options.parse_job
mblighe7d9c602009-07-02 19:02:33 +0000407 execution_tag = parser.options.execution_tag
408 if not execution_tag:
409 execution_tag = parse_job
jadmanski0afbb632008-06-06 21:10:57 +0000410 ssh_user = parser.options.ssh_user
411 ssh_port = parser.options.ssh_port
412 ssh_pass = parser.options.ssh_pass
jadmanskidef0c3c2009-03-25 20:07:10 +0000413 collect_crashinfo = parser.options.collect_crashinfo
mblighe0cbc912010-03-11 18:03:07 +0000414 control_filename = parser.options.control_filename
Scott Zawalski91493c82013-01-25 16:15:20 -0500415 test_retry = parser.options.test_retry
beepscb6f1e22013-06-28 19:14:10 -0700416 verify_job_repo_url = parser.options.verify_job_repo_url
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700417 skip_crash_collection = parser.options.skip_crash_collection
Aviv Keshet18ee3142013-08-12 15:01:51 -0700418 ssh_verbosity = int(parser.options.ssh_verbosity)
Fang Deng6cc20de2013-09-06 15:47:32 -0700419 ssh_options = parser.options.ssh_options
Dan Shib669cbd2013-09-13 11:17:17 -0700420 no_use_packaging = parser.options.no_use_packaging
Simran Basi1bf60eb2015-12-01 16:39:29 -0800421 host_attributes = parser.options.host_attributes
422 in_lab = bool(parser.options.lab)
mbligha46678d2008-05-01 20:00:01 +0000423
mblighb2bea302008-07-24 20:25:57 +0000424 # can't be both a client and a server side test
425 if client and server:
Eric Li861b2d52011-02-04 14:50:35 -0800426 parser.parser.error("Can not specify a test as both server and client!")
mblighb2bea302008-07-24 20:25:57 +0000427
Alex Millercb79ba72013-05-29 14:43:00 -0700428 if provision and client:
429 parser.parser.error("Cannot specify provisioning and client!")
430
431 is_special_task = (verify or repair or cleanup or collect_crashinfo or
Dan Shi07e09af2013-04-12 09:31:29 -0700432 provision or reset)
Alex Millercb79ba72013-05-29 14:43:00 -0700433 if len(parser.args) < 1 and not is_special_task:
Eric Li861b2d52011-02-04 14:50:35 -0800434 parser.parser.error("Missing argument: control file")
mbligha46678d2008-05-01 20:00:01 +0000435
Aviv Keshet18ee3142013-08-12 15:01:51 -0700436 if ssh_verbosity > 0:
437 # ssh_verbosity is an integer between 0 and 3, inclusive
438 ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
Fang Dengd1c2b732013-08-20 12:59:46 -0700439 else:
440 ssh_verbosity_flag = ''
Aviv Keshet18ee3142013-08-12 15:01:51 -0700441
showard45ae8192008-11-05 19:32:53 +0000442 # We have a control file unless it's just a verify/repair/cleanup job
jadmanski0afbb632008-06-06 21:10:57 +0000443 if len(parser.args) > 0:
444 control = parser.args[0]
445 else:
446 control = None
mbligha46678d2008-05-01 20:00:01 +0000447
Dan Shicf4d2032015-03-12 15:04:21 -0700448 machines = _get_machines(parser)
mbligh374f3412009-05-13 21:29:45 +0000449 if group_name and len(machines) < 2:
Dan Shicf4d2032015-03-12 15:04:21 -0700450 parser.parser.error('-G %r may only be supplied with more than one '
451 'machine.' % group_name)
mbligh374f3412009-05-13 21:29:45 +0000452
Christopher Wiley8a91f232013-07-09 11:02:27 -0700453 kwargs = {'group_name': group_name, 'tag': execution_tag,
Dan Shicf4d2032015-03-12 15:04:21 -0700454 'disable_sysinfo': parser.options.disable_sysinfo}
Dan Shi70647ca2015-07-16 22:52:35 -0700455 if parser.options.parent_job_id:
456 kwargs['parent_job_id'] = int(parser.options.parent_job_id)
mblighe0cbc912010-03-11 18:03:07 +0000457 if control_filename:
458 kwargs['control_filename'] = control_filename
Simran Basi1bf60eb2015-12-01 16:39:29 -0800459 if host_attributes:
460 kwargs['host_attributes'] = host_attributes
461 kwargs['in_lab'] = in_lab
jadmanski0afbb632008-06-06 21:10:57 +0000462 job = server_job.server_job(control, parser.args[1:], results, label,
463 user, machines, client, parse_job,
Fang Dengd1c2b732013-08-20 12:59:46 -0700464 ssh_user, ssh_port, ssh_pass,
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700465 ssh_verbosity_flag, ssh_options,
466 test_retry, **kwargs)
Dan Shicf4d2032015-03-12 15:04:21 -0700467
showard75cdfee2009-06-10 17:40:41 +0000468 job.logging.start_logging()
mbligh4608b002010-01-05 18:22:35 +0000469 job.init_parser()
mbligha46678d2008-05-01 20:00:01 +0000470
mbligh161fe6f2008-06-19 16:26:04 +0000471 # perform checks
472 job.precheck()
473
jadmanski0afbb632008-06-06 21:10:57 +0000474 # run the job
475 exit_code = 0
Dan Shic1b8bdd2015-09-14 23:11:24 -0700476 auto_start_servod = _CONFIG.get_config_value(
477 'AUTOSERV', 'auto_start_servod', type=bool, default=False)
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700478
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800479 site_utils.SetupTsMonGlobalState('autoserv', indirect=False,
480 short_lived=True)
jadmanski0afbb632008-06-06 21:10:57 +0000481 try:
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800482 try:
483 if repair:
484 if auto_start_servod and len(machines) == 1:
485 _start_servod(machines[0])
486 job.repair(job_labels)
487 elif verify:
488 job.verify(job_labels)
489 elif provision:
490 job.provision(job_labels)
491 elif reset:
492 job.reset(job_labels)
493 elif cleanup:
494 job.cleanup(job_labels)
495 else:
496 if auto_start_servod and len(machines) == 1:
497 _start_servod(machines[0])
498 if use_ssp:
499 try:
500 _run_with_ssp(job, container_name, job_or_task_id,
501 results, parser, ssp_url, job_folder,
502 machines)
503 finally:
504 # Update the ownership of files in result folder.
505 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700506 else:
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800507 if collect_crashinfo:
508 # Update the ownership of files in result folder. If the
509 # job to collect crashinfo was running inside container
510 # (SSP) and crashed before correcting folder permission,
511 # the result folder might have wrong permission setting.
Dan Shiafa63872016-02-23 15:32:31 -0800512 try:
513 correct_results_folder_permission(results)
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800514 except:
515 # Ignore any error as the user may not have root
516 # permission to run sudo command.
517 pass
Aviv Keshet92bf7b62017-02-13 15:34:03 -0800518 metric_name = ('chromeos/autotest/experimental/'
519 'autoserv_job_run_duration')
520 f = {'in_container': utils.is_in_container(),
521 'success': False}
522 with metrics.SecondsTimer(metric_name, fields=f) as c:
523 job.run(install_before, install_after,
524 verify_job_repo_url=verify_job_repo_url,
525 only_collect_crashinfo=collect_crashinfo,
526 skip_crash_collection=skip_crash_collection,
527 job_labels=job_labels,
528 use_packaging=(not no_use_packaging))
529 c['success'] = True
530
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800531 finally:
Hidehiko Abe06893302017-06-24 07:32:38 +0900532 job.close()
Dan Shiffd5b822017-07-14 11:16:23 -0700533 # Special task doesn't run parse, so result summary needs to be
534 # built here.
535 if results and (repair or verify or reset or cleanup or provision):
536 site_utils.collect_result_sizes(results)
jadmanski0afbb632008-06-06 21:10:57 +0000537 except:
jadmanski27b37ea2008-10-29 23:54:31 +0000538 exit_code = 1
jadmanski0afbb632008-06-06 21:10:57 +0000539 traceback.print_exc()
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800540 finally:
541 metrics.Flush()
mbligha46678d2008-05-01 20:00:01 +0000542
mblighff7d61f2008-12-22 14:53:35 +0000543 if pid_file_manager:
544 pid_file_manager.num_tests_failed = job.num_tests_failed
545 pid_file_manager.close_file(exit_code)
jadmanskie0dffc32008-12-15 17:30:30 +0000546 job.cleanup_parser()
showard21baa452008-10-21 00:08:39 +0000547
jadmanski27b37ea2008-10-29 23:54:31 +0000548 sys.exit(exit_code)
mbligha46678d2008-05-01 20:00:01 +0000549
550
Fang Deng042c1472014-10-23 13:56:41 -0700551def record_autoserv(options, duration_secs):
552 """Record autoserv end-to-end time in metadata db.
553
554 @param options: parser options.
555 @param duration_secs: How long autoserv has taken, in secs.
556 """
557 # Get machine hostname
558 machines = options.machines.replace(
559 ',', ' ').strip().split() if options.machines else []
560 num_machines = len(machines)
561 if num_machines > 1:
562 # Skip the case where atomic group is used.
563 return
564 elif num_machines == 0:
565 machines.append('hostless')
566
567 # Determine the status that will be reported.
568 s = job_overhead.STATUS
569 task_mapping = {
570 'reset': s.RESETTING, 'verify': s.VERIFYING,
571 'provision': s.PROVISIONING, 'repair': s.REPAIRING,
572 'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
Dan Shi888cfca2015-07-31 15:49:00 -0700573 match = filter(lambda task: getattr(options, task, False) == True,
574 task_mapping)
Fang Deng042c1472014-10-23 13:56:41 -0700575 status = task_mapping[match[0]] if match else s.RUNNING
576 is_special_task = status not in [s.RUNNING, s.GATHERING]
Dan Shicf4d2032015-03-12 15:04:21 -0700577 job_or_task_id = job_directories.get_job_id_or_task_id(options.results)
Fang Deng042c1472014-10-23 13:56:41 -0700578 job_overhead.record_state_duration(
579 job_or_task_id, machines[0], status, duration_secs,
580 is_special_task=is_special_task)
581
582
mbligha46678d2008-05-01 20:00:01 +0000583def main():
Fang Deng042c1472014-10-23 13:56:41 -0700584 start_time = datetime.datetime.now()
jadmanski0afbb632008-06-06 21:10:57 +0000585 # grab the parser
586 parser = autoserv_parser.autoserv_parser
mbligha5cb4062009-02-17 15:53:39 +0000587 parser.parse_args()
mbligha46678d2008-05-01 20:00:01 +0000588
jadmanski0afbb632008-06-06 21:10:57 +0000589 if len(sys.argv) == 1:
590 parser.parser.print_help()
591 sys.exit(1)
mbligha6f13082008-06-05 23:53:46 +0000592
showard75cdfee2009-06-10 17:40:41 +0000593 if parser.options.no_logging:
594 results = None
595 else:
596 results = parser.options.results
mbligh80e1eba2008-11-19 00:26:18 +0000597 if not results:
598 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
Dan Shi14de7622016-08-22 11:09:06 -0700599 results = os.path.abspath(results)
showard566d3c02010-01-12 18:57:01 +0000600 resultdir_exists = False
601 for filename in ('control.srv', 'status.log', '.autoserv_execute'):
602 if os.path.exists(os.path.join(results, filename)):
603 resultdir_exists = True
mbligh4608b002010-01-05 18:22:35 +0000604 if not parser.options.use_existing_results and resultdir_exists:
mbligh80e1eba2008-11-19 00:26:18 +0000605 error = "Error: results directory already exists: %s\n" % results
606 sys.stderr.write(error)
607 sys.exit(1)
mbligha788dc42009-03-26 21:10:16 +0000608
609 # Now that we certified that there's no leftover results dir from
610 # previous jobs, lets create the result dir since the logging system
611 # needs to create the log file in there.
612 if not os.path.isdir(results):
613 os.makedirs(results)
showard75cdfee2009-06-10 17:40:41 +0000614
Prathmesh Prabhu7ae68ae2017-06-15 00:29:31 -0700615 # If the job requires to run with server-side package, try to stage server-
616 # side package first. If that fails with error that autotest server package
617 # does not exist, fall back to run the job without using server-side
618 # packaging. If option warn_no_ssp is specified, that means autoserv is
619 # running in a drone does not support SSP, thus no need to stage server-side
620 # package.
621 ssp_url = None
622 ssp_url_warning = False
623 if (not parser.options.warn_no_ssp and parser.options.require_ssp):
624 ssp_url, ssp_error_msg = _stage_ssp(parser)
625 # The build does not have autotest server package. Fall back to not
626 # to use server-side package. Logging is postponed until logging being
627 # set up.
628 ssp_url_warning = not ssp_url
629
Dan Shic68fefb2015-04-07 10:10:52 -0700630 # Server-side packaging will only be used if it's required and the package
631 # is available. If warn_no_ssp is specified, it means that autoserv is
632 # running in a drone does not have SSP supported and a warning will be logs.
633 # Therefore, it should not run with SSP.
634 use_ssp = (not parser.options.warn_no_ssp and parser.options.require_ssp
635 and ssp_url)
636 if use_ssp:
Dan Shie28de552015-05-06 16:51:58 -0700637 log_dir = os.path.join(results, 'ssp_logs') if results else None
Dan Shicf4d2032015-03-12 15:04:21 -0700638 if log_dir and not os.path.exists(log_dir):
639 os.makedirs(log_dir)
640 else:
641 log_dir = results
Dan Shi3f1b8a52015-04-21 11:11:06 -0700642
showard75cdfee2009-06-10 17:40:41 +0000643 logging_manager.configure_logging(
Dan Shicf4d2032015-03-12 15:04:21 -0700644 server_logging_config.ServerLoggingConfig(),
645 results_dir=log_dir,
showard10d84172009-06-18 23:16:50 +0000646 use_console=not parser.options.no_tee,
647 verbose=parser.options.verbose,
648 no_console_prefix=parser.options.no_console_prefix)
Dan Shicf4d2032015-03-12 15:04:21 -0700649
Dan Shi0b754c52015-04-20 14:20:38 -0700650 if ssp_url_warning:
651 logging.warn(
652 'Autoserv is required to run with server-side packaging. '
653 'However, no server-side package can be found based on '
Dan Shi6450e142016-03-11 11:52:20 -0800654 '`--image`, host attribute job_repo_url or host OS version '
655 'label. It could be that the build to test is older than the '
656 'minimum version that supports server-side packaging. The test '
Dan Shi14de7622016-08-22 11:09:06 -0700657 'will be executed without using erver-side packaging. '
658 'Following is the detailed error:\n%s', ssp_error_msg)
Dan Shi0b754c52015-04-20 14:20:38 -0700659
showard75cdfee2009-06-10 17:40:41 +0000660 if results:
mbligha788dc42009-03-26 21:10:16 +0000661 logging.info("Results placed in %s" % results)
mbligh10717632008-11-19 00:21:57 +0000662
mbligh4608b002010-01-05 18:22:35 +0000663 # wait until now to perform this check, so it get properly logged
Dan Shicf4d2032015-03-12 15:04:21 -0700664 if (parser.options.use_existing_results and not resultdir_exists and
Dan Shiff78f112015-06-12 13:34:02 -0700665 not utils.is_in_container()):
mbligh4608b002010-01-05 18:22:35 +0000666 logging.error("No existing results directory found: %s", results)
667 sys.exit(1)
668
Dan Shicf4d2032015-03-12 15:04:21 -0700669 logging.debug('autoserv is running in drone %s.', socket.gethostname())
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700670 logging.debug('autoserv command was: %s', ' '.join(sys.argv))
mbligh4608b002010-01-05 18:22:35 +0000671
Dan Shicf4d2032015-03-12 15:04:21 -0700672 if parser.options.write_pidfile and results:
mbligh4608b002010-01-05 18:22:35 +0000673 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
674 results)
jadmanskid5ab8c52008-12-03 16:27:07 +0000675 pid_file_manager.open_file()
mblighff7d61f2008-12-22 14:53:35 +0000676 else:
677 pid_file_manager = None
mbligha46678d2008-05-01 20:00:01 +0000678
jadmanskif22fea82008-11-26 20:57:07 +0000679 autotest.BaseAutotest.set_install_in_tmpdir(
680 parser.options.install_in_tmpdir)
681
Dan Shia1ecd5c2013-06-06 11:21:31 -0700682 try:
683 # Take the first argument as control file name, get the test name from
Prathmesh Prabhueb7cfc12016-11-23 16:37:30 -0800684 # the control file.
Dan Shia1ecd5c2013-06-06 11:21:31 -0700685 if (len(parser.args) > 0 and parser.args[0] != '' and
686 parser.options.machines):
Dan Shibbc16132013-07-09 16:23:59 -0700687 try:
688 test_name = control_data.parse_control(parser.args[0],
689 raise_warnings=True).name
690 except control_data.ControlVariableException:
691 logging.debug('Failed to retrieve test name from control file.')
692 test_name = None
Dan Shia1ecd5c2013-06-06 11:21:31 -0700693 except control_data.ControlVariableException as e:
694 logging.error(str(e))
jadmanski0afbb632008-06-06 21:10:57 +0000695 exit_code = 0
Prashanth B6285f6a2014-05-08 18:01:27 -0700696 # TODO(beeps): Extend this to cover different failure modes.
697 # Testing exceptions are matched against labels sent to autoserv. Eg,
698 # to allow only the hostless job to run, specify
699 # testing_exceptions: test_suite in the shadow_config. To allow both
700 # the hostless job and dummy_Pass to run, specify
701 # testing_exceptions: test_suite,dummy_Pass. You can figure out
702 # what label autoserv is invoked with by looking through the logs of a test
703 # for the autoserv command's -l option.
Dan Shia06f3e22015-09-03 16:15:15 -0700704 testing_exceptions = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700705 'AUTOSERV', 'testing_exceptions', type=list, default=[])
Dan Shia06f3e22015-09-03 16:15:15 -0700706 test_mode = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700707 'AUTOSERV', 'testing_mode', type=bool, default=False)
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800708 test_mode = (results_mocker and test_mode and not
709 any([ex in parser.options.label
710 for ex in testing_exceptions]))
711 is_task = (parser.options.verify or parser.options.repair or
712 parser.options.provision or parser.options.reset or
713 parser.options.cleanup or parser.options.collect_crashinfo)
jadmanski0afbb632008-06-06 21:10:57 +0000714 try:
715 try:
Prashanth B6285f6a2014-05-08 18:01:27 -0700716 if test_mode:
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800717 # The parser doesn't run on tasks anyway, so we can just return
718 # happy signals without faking results.
719 if not is_task:
720 machine = parser.options.results.split('/')[-1]
721
722 # TODO(beeps): The proper way to do this would be to
723 # refactor job creation so we can invoke job.record
724 # directly. To do that one needs to pipe the test_name
725 # through run_autoserv and bail just before invoking
726 # the server job. See the comment in
727 # puppylab/results_mocker for more context.
728 results_mocker.ResultsMocker(
Prashanth Balasubramanian22dd2262014-11-28 18:19:18 -0800729 test_name if test_name else 'unknown-test',
730 parser.options.results, machine
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800731 ).mock_results()
732 return
Prashanth B6285f6a2014-05-08 18:01:27 -0700733 else:
Dan Shic68fefb2015-04-07 10:10:52 -0700734 run_autoserv(pid_file_manager, results, parser, ssp_url,
735 use_ssp)
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700736 except SystemExit as e:
jadmanski0afbb632008-06-06 21:10:57 +0000737 exit_code = e.code
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700738 if exit_code:
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700739 logging.exception('Uncaught SystemExit with code %s', exit_code)
740 except Exception:
jadmanski0afbb632008-06-06 21:10:57 +0000741 # If we don't know what happened, we'll classify it as
742 # an 'abort' and return 1.
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700743 logging.exception('Uncaught Exception, exit_code = 1.')
jadmanski0afbb632008-06-06 21:10:57 +0000744 exit_code = 1
745 finally:
mblighff7d61f2008-12-22 14:53:35 +0000746 if pid_file_manager:
747 pid_file_manager.close_file(exit_code)
Fang Deng042c1472014-10-23 13:56:41 -0700748 # Record the autoserv duration time. Must be called
749 # just before the system exits to ensure accuracy.
750 duration_secs = (datetime.datetime.now() - start_time).total_seconds()
751 record_autoserv(parser.options, duration_secs)
jadmanski0afbb632008-06-06 21:10:57 +0000752 sys.exit(exit_code)
mblighfaf0cd42007-11-19 16:00:24 +0000753
mblighbb421852008-03-11 22:36:16 +0000754
mbligha46678d2008-05-01 20:00:01 +0000755if __name__ == '__main__':
jadmanski0afbb632008-06-06 21:10:57 +0000756 main()