blob: bdc47a95ba1410da6b6e4768520f123c1d8d7d9e [file] [log] [blame]
mbligh6203ace2007-10-04 21:54:24 +00001#!/usr/bin/python -u
mbligh1ffd5dc2008-11-25 13:24:05 +00002# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
mbligh82648e52008-11-20 16:54:25 +00003# Released under the GPL v2
mblighdcd57a82007-07-11 23:06:47 +00004
mblighc8949b82007-07-23 16:33:58 +00005"""
Aviv Keshetde6bb192013-01-30 16:17:22 -08006Run a control file through the server side engine
mblighdcd57a82007-07-11 23:06:47 +00007"""
mbligh1ffd5dc2008-11-25 13:24:05 +00008
Fang Deng042c1472014-10-23 13:56:41 -07009import datetime
Paul Hobbs20cc72a2016-08-30 16:57:05 -070010import contextlib
Fang Deng042c1472014-10-23 13:56:41 -070011import getpass
12import logging
13import os
14import re
15import signal
Dan Shicf4d2032015-03-12 15:04:21 -070016import socket
Fang Deng042c1472014-10-23 13:56:41 -070017import sys
18import traceback
19import time
20import urllib2
mbligh1ffd5dc2008-11-25 13:24:05 +000021
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080022
mblighf5427bb2008-04-09 15:55:57 +000023import common
Dan Shia1ecd5c2013-06-06 11:21:31 -070024from autotest_lib.client.common_lib import control_data
Dan Shi32649b82015-08-29 20:53:36 -070025from autotest_lib.client.common_lib import error
Dan Shia1ecd5c2013-06-06 11:21:31 -070026from autotest_lib.client.common_lib import global_config
Dan Shi5ddf6a32015-05-02 00:22:01 -070027from autotest_lib.client.common_lib import utils
Dan Shi37bee222015-04-13 15:46:47 -070028from autotest_lib.client.common_lib.cros.graphite import autotest_es
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080029
Dan Shi5e2efb72017-02-07 11:40:23 -080030try:
31 from chromite.lib import metrics
32except ImportError:
33 metrics = utils.metrics_mock
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080034
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -080035try:
36 from autotest_lib.puppylab import results_mocker
37except ImportError:
38 results_mocker = None
39
Dan Shia06f3e22015-09-03 16:15:15 -070040_CONFIG = global_config.global_config
41
Dan Shia1ecd5c2013-06-06 11:21:31 -070042
Jakob Jueliche497b552014-09-23 19:11:59 -070043# Number of seconds to wait before returning if testing mode is enabled
Prashanth B6285f6a2014-05-08 18:01:27 -070044TESTING_MODE_SLEEP_SECS = 1
Jakob Jueliche497b552014-09-23 19:11:59 -070045
mbligh9ff89cd2009-09-03 20:28:17 +000046
Kevin Cheng9b6930f2016-07-20 14:57:15 -070047from autotest_lib.server import frontend
showard75cdfee2009-06-10 17:40:41 +000048from autotest_lib.server import server_logging_config
showard043c62a2009-06-10 19:48:57 +000049from autotest_lib.server import server_job, utils, autoserv_parser, autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070050from autotest_lib.server import utils as server_utils
Paul Hobbs20cc72a2016-08-30 16:57:05 -070051from autotest_lib.server import site_utils
Kevin Chengadc99f92016-07-20 08:21:58 -070052from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Dan Shicf4d2032015-03-12 15:04:21 -070053from autotest_lib.site_utils import job_directories
Fang Deng042c1472014-10-23 13:56:41 -070054from autotest_lib.site_utils import job_overhead
Dan Shicf4d2032015-03-12 15:04:21 -070055from autotest_lib.site_utils import lxc
Dan Shi7836d252015-04-27 15:33:58 -070056from autotest_lib.site_utils import lxc_utils
showard75cdfee2009-06-10 17:40:41 +000057from autotest_lib.client.common_lib import pidfile, logging_manager
mbligh92c0fc22008-11-20 16:52:23 +000058
Paul Hobbs20cc72a2016-08-30 16:57:05 -070059
Dan Shicf4d2032015-03-12 15:04:21 -070060# Control segment to stage server-side package.
61STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
62 'stage_server_side_package')
63
Dan Shia06f3e22015-09-03 16:15:15 -070064# Command line to start servod in a moblab.
65START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s'
66STOP_SERVOD_CMD = 'sudo stop servod'
67
Alex Millerf1af17e2013-01-09 22:50:32 -080068def log_alarm(signum, frame):
69 logging.error("Received SIGALARM. Ignoring and continuing on.")
Alex Miller0528d6f2013-01-11 10:49:48 -080070 sys.exit(1)
Alex Millerf1af17e2013-01-09 22:50:32 -080071
Dan Shicf4d2032015-03-12 15:04:21 -070072
73def _get_machines(parser):
74 """Get a list of machine names from command line arg -m or a file.
75
76 @param parser: Parser for the command line arguments.
77
78 @return: A list of machine names from command line arg -m or the
79 machines file specified in the command line arg -M.
80 """
81 if parser.options.machines:
82 machines = parser.options.machines.replace(',', ' ').strip().split()
83 else:
84 machines = []
85 machines_file = parser.options.machines_file
86 if machines_file:
87 machines = []
88 for m in open(machines_file, 'r').readlines():
89 # remove comments, spaces
90 m = re.sub('#.*', '', m).strip()
91 if m:
92 machines.append(m)
93 logging.debug('Read list of machines from file: %s', machines_file)
94 logging.debug('Machines: %s', ','.join(machines))
95
96 if machines:
97 for machine in machines:
98 if not machine or re.search('\s', machine):
99 parser.parser.error("Invalid machine: %s" % str(machine))
100 machines = list(set(machines))
101 machines.sort()
102 return machines
103
104
105def _stage_ssp(parser):
106 """Stage server-side package.
107
108 This function calls a control segment to stage server-side package based on
109 the job and autoserv command line option. The detail implementation could
110 be different for each host type. Currently, only CrosHost has
111 stage_server_side_package function defined.
112 The script returns None if no server-side package is available. However,
113 it may raise exception if it failed for reasons other than artifact (the
114 server-side package) not found.
115
116 @param parser: Command line arguments parser passed in the autoserv process.
117
Dan Shi14de7622016-08-22 11:09:06 -0700118 @return: (ssp_url, error_msg), where
119 ssp_url is a url to the autotest server-side package. None if
120 server-side package is not supported.
121 error_msg is a string indicating the failures. None if server-
122 side package is staged successfully.
Dan Shicf4d2032015-03-12 15:04:21 -0700123 """
Kevin Chengadc99f92016-07-20 08:21:58 -0700124 machines_list = _get_machines(parser)
Prathmesh Prabhucbebd982017-01-10 15:59:43 -0800125 machines_list = server_job.get_machine_dicts(
126 machines_list, parser.options.lab, parser.options.host_attributes)
Kevin Chengadc99f92016-07-20 08:21:58 -0700127
Dan Shi36cfd832014-10-10 13:38:51 -0700128 # If test_source_build is not specified, default to use server-side test
129 # code from build specified in --image.
Kevin Chengadc99f92016-07-20 08:21:58 -0700130 namespace = {'machines': machines_list,
Dan Shi36cfd832014-10-10 13:38:51 -0700131 'image': (parser.options.test_source_build or
132 parser.options.image),}
Dan Shicf4d2032015-03-12 15:04:21 -0700133 script_locals = {}
134 execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
Dan Shi14de7622016-08-22 11:09:06 -0700135 return script_locals['ssp_url'], script_locals['error_msg']
Dan Shicf4d2032015-03-12 15:04:21 -0700136
137
Dan Shiafa63872016-02-23 15:32:31 -0800138def _run_with_ssp(job, container_name, job_id, results, parser, ssp_url,
Dan Shi3be35af2016-08-25 23:22:40 -0700139 job_folder, machines):
Dan Shicf4d2032015-03-12 15:04:21 -0700140 """Run the server job with server-side packaging.
141
Dan Shi37befda2015-12-07 13:16:56 -0800142 @param job: The server job object.
Dan Shicf4d2032015-03-12 15:04:21 -0700143 @param container_name: Name of the container to run the test.
144 @param job_id: ID of the test job.
145 @param results: Folder to store results. This could be different from
146 parser.options.results:
147 parser.options.results can be set to None for results to be
148 stored in a temp folder.
149 results can be None for autoserv run requires no logging.
150 @param parser: Command line parser that contains the options.
151 @param ssp_url: url of the staged server-side package.
Dan Shiafa63872016-02-23 15:32:31 -0800152 @param job_folder: Name of the job result folder.
Dan Shi3be35af2016-08-25 23:22:40 -0700153 @param machines: A list of machines to run the test.
Dan Shicf4d2032015-03-12 15:04:21 -0700154 """
155 bucket = lxc.ContainerBucket()
156 control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
157 else None)
Dan Shi37befda2015-12-07 13:16:56 -0800158 try:
Dan Shi3be35af2016-08-25 23:22:40 -0700159 dut_name = machines[0] if len(machines) >= 1 else None
Dan Shi37befda2015-12-07 13:16:56 -0800160 test_container = bucket.setup_test(container_name, job_id, ssp_url,
Dan Shiafa63872016-02-23 15:32:31 -0800161 results, control=control,
Dan Shi3be35af2016-08-25 23:22:40 -0700162 job_folder=job_folder,
163 dut_name=dut_name)
Dan Shi37befda2015-12-07 13:16:56 -0800164 except Exception as e:
165 job.record('FAIL', None, None,
166 'Failed to setup container for test: %s. Check logs in '
167 'ssp_logs folder for more details.' % e)
168 raise
169
Dan Shicf4d2032015-03-12 15:04:21 -0700170 args = sys.argv[:]
171 args.remove('--require-ssp')
Dan Shi77b79a62015-07-29 16:22:05 -0700172 # --parent_job_id is only useful in autoserv running in host, not in
173 # container. Include this argument will cause test to fail for builds before
174 # CL 286265 was merged.
175 if '--parent_job_id' in args:
176 index = args.index('--parent_job_id')
177 args.remove('--parent_job_id')
178 # Remove the actual parent job id in command line arg.
179 del args[index]
Dan Shicf4d2032015-03-12 15:04:21 -0700180
181 # A dictionary of paths to replace in the command line. Key is the path to
182 # be replaced with the one in value.
183 paths_to_replace = {}
184 # Replace the control file path with the one in container.
185 if control:
186 container_control_filename = os.path.join(
187 lxc.CONTROL_TEMP_PATH, os.path.basename(control))
188 paths_to_replace[control] = container_control_filename
189 # Update result directory with the one in container.
Dan Shi65374e22016-09-15 16:14:05 -0700190 container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_folder)
Dan Shicf4d2032015-03-12 15:04:21 -0700191 if parser.options.results:
Dan Shicf4d2032015-03-12 15:04:21 -0700192 paths_to_replace[parser.options.results] = container_result_dir
193 # Update parse_job directory with the one in container. The assumption is
194 # that the result folder to be parsed is always the same as the results_dir.
195 if parser.options.parse_job:
Dan Shicf4d2032015-03-12 15:04:21 -0700196 paths_to_replace[parser.options.parse_job] = container_result_dir
197
198 args = [paths_to_replace.get(arg, arg) for arg in args]
199
200 # Apply --use-existing-results, results directory is aready created and
201 # mounted in container. Apply this arg to avoid exception being raised.
202 if not '--use-existing-results' in args:
203 args.append('--use-existing-results')
204
205 # Make sure autoserv running in container using a different pid file.
206 if not '--pidfile-label' in args:
207 args.extend(['--pidfile-label', 'container_autoserv'])
208
Dan Shid1f51232015-04-18 00:29:14 -0700209 cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
Dan Shicf4d2032015-03-12 15:04:21 -0700210 logging.info('Run command in container: %s', cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700211 success = False
Dan Shicf4d2032015-03-12 15:04:21 -0700212 try:
213 test_container.attach_run(cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700214 success = True
Dan Shi9d3454e2015-12-08 09:16:08 -0800215 except Exception as e:
216 # If the test run inside container fails without generating any log,
217 # write a message to status.log to help troubleshooting.
218 debug_files = os.listdir(os.path.join(results, 'debug'))
219 if not debug_files:
220 job.record('FAIL', None, None,
221 'Failed to run test inside the container: %s. Check '
222 'logs in ssp_logs folder for more details.' % e)
223 raise
Dan Shicf4d2032015-03-12 15:04:21 -0700224 finally:
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -0800225 metrics.Counter(
226 'chromeos/autotest/experimental/execute_job_in_ssp').increment(
227 fields={'success': success})
Dan Shi37bee222015-04-13 15:46:47 -0700228 # metadata is uploaded separately so it can use http to upload.
229 metadata = {'drone': socket.gethostname(),
230 'job_id': job_id,
231 'success': success}
232 autotest_es.post(use_http=True,
233 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
234 metadata=metadata)
Dan Shicf4d2032015-03-12 15:04:21 -0700235 test_container.destroy()
236
237
Dan Shi3f1b8a52015-04-21 11:11:06 -0700238def correct_results_folder_permission(results):
239 """Make sure the results folder has the right permission settings.
240
241 For tests running with server-side packaging, the results folder has the
242 owner of root. This must be changed to the user running the autoserv
243 process, so parsing job can access the results folder.
244 TODO(dshi): crbug.com/459344 Remove this function when test container can be
245 unprivileged container.
246
247 @param results: Path to the results folder.
248
249 """
250 if not results:
251 return
252
Dan Shi32649b82015-08-29 20:53:36 -0700253 try:
254 utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
255 utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
256 except error.CmdError as e:
257 metadata = {'error': str(e),
258 'result_folder': results,
259 'drone': socket.gethostname()}
260 autotest_es.post(use_http=True, type_str='correct_results_folder_failure',
261 metadata=metadata)
262 raise
Dan Shi3f1b8a52015-04-21 11:11:06 -0700263
264
Dan Shia06f3e22015-09-03 16:15:15 -0700265def _start_servod(machine):
266 """Try to start servod in moblab if it's not already running or running with
267 different board or port.
268
269 @param machine: Name of the dut used for test.
270 """
271 if not utils.is_moblab():
272 return
273
Dan Shi1cded882015-09-23 16:52:26 -0700274 logging.debug('Trying to start servod.')
Dan Shia06f3e22015-09-03 16:15:15 -0700275 try:
Kevin Cheng9b6930f2016-07-20 14:57:15 -0700276 afe = frontend.AFE()
Dan Shia06f3e22015-09-03 16:15:15 -0700277 board = server_utils.get_board_from_afe(machine, afe)
278 hosts = afe.get_hosts(hostname=machine)
279 servo_host = hosts[0].attributes.get('servo_host', None)
280 servo_port = hosts[0].attributes.get('servo_port', 9999)
281 if not servo_host in ['localhost', '127.0.0.1']:
Dan Shi1cded882015-09-23 16:52:26 -0700282 logging.warn('Starting servod is aborted. The dut\'s servo_host '
283 'attribute is not set to localhost.')
Dan Shia06f3e22015-09-03 16:15:15 -0700284 return
285 except (urllib2.HTTPError, urllib2.URLError):
286 # Ignore error if RPC failed to get board
287 logging.error('Failed to get board name from AFE. Start servod is '
288 'aborted')
289 return
290
291 try:
292 pid = utils.run('pgrep servod').stdout
293 cmd_line = utils.run('ps -fp %s' % pid).stdout
294 if ('--board %s' % board in cmd_line and
295 '--port %s' % servo_port in cmd_line):
296 logging.debug('Servod is already running with given board and port.'
297 ' There is no need to restart servod.')
298 return
299 logging.debug('Servod is running with different board or port. '
300 'Stopping existing servod.')
301 utils.run('sudo stop servod')
302 except error.CmdError:
303 # servod is not running.
304 pass
305
306 try:
307 utils.run(START_SERVOD_CMD % (board, servo_port))
308 logging.debug('Servod is started')
309 except error.CmdError as e:
310 logging.error('Servod failed to be started, error: %s', e)
311
312
Dan Shic68fefb2015-04-07 10:10:52 -0700313def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700314 """Run server job with given options.
315
316 @param pid_file_manager: PidFileManager used to monitor the autoserv process
317 @param results: Folder to store results.
318 @param parser: Parser for the command line arguments.
319 @param ssp_url: Url to server-side package.
Dan Shic68fefb2015-04-07 10:10:52 -0700320 @param use_ssp: Set to True to run with server-side packaging.
Dan Shicf4d2032015-03-12 15:04:21 -0700321 """
Dan Shiec1d47d2015-02-13 11:38:13 -0800322 if parser.options.warn_no_ssp:
Dan Shic68fefb2015-04-07 10:10:52 -0700323 # Post a warning in the log.
Dan Shiec1d47d2015-02-13 11:38:13 -0800324 logging.warn('Autoserv is required to run with server-side packaging. '
325 'However, no drone is found to support server-side '
326 'packaging. The test will be executed in a drone without '
327 'server-side packaging supported.')
328
jadmanski0afbb632008-06-06 21:10:57 +0000329 # send stdin to /dev/null
330 dev_null = os.open(os.devnull, os.O_RDONLY)
331 os.dup2(dev_null, sys.stdin.fileno())
332 os.close(dev_null)
mblighdbf37612007-11-24 19:38:11 +0000333
Dan Shie8aeb662016-06-30 11:22:03 -0700334 # Create separate process group if the process is not a process group
335 # leader. This allows autoserv process to keep running after the caller
336 # process (drone manager call) exits.
337 if os.getpid() != os.getpgid(0):
338 os.setsid()
mbligh1d42d4e2007-11-05 22:42:00 +0000339
Dan Shicf4d2032015-03-12 15:04:21 -0700340 # Container name is predefined so the container can be destroyed in
341 # handle_sigterm.
342 job_or_task_id = job_directories.get_job_id_or_task_id(
343 parser.options.results)
344 container_name = (lxc.TEST_CONTAINER_NAME_FMT %
Dan Shid68d51c2015-04-21 17:00:42 -0700345 (job_or_task_id, time.time(), os.getpid()))
Dan Shiafa63872016-02-23 15:32:31 -0800346 job_folder = job_directories.get_job_folder_name(parser.options.results)
Dan Shicf4d2032015-03-12 15:04:21 -0700347
jadmanski0afbb632008-06-06 21:10:57 +0000348 # Implement SIGTERM handler
mblighc2299562009-07-02 19:00:36 +0000349 def handle_sigterm(signum, frame):
Simran Basi9d9b7292013-10-16 16:44:07 -0700350 logging.debug('Received SIGTERM')
mblighff7d61f2008-12-22 14:53:35 +0000351 if pid_file_manager:
352 pid_file_manager.close_file(1, signal.SIGTERM)
Simran Basi49e21e62013-10-17 12:40:33 -0700353 logging.debug('Finished writing to pid_file. Killing process.')
Dan Shi3f1b8a52015-04-21 11:11:06 -0700354
355 # Update results folder's file permission. This needs to be done ASAP
356 # before the parsing process tries to access the log.
357 if use_ssp and results:
358 correct_results_folder_permission(results)
359
Simran Basid6b83772014-01-06 16:31:30 -0800360 # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
361 # This sleep allows the pending output to be logged before the kill
362 # signal is sent.
363 time.sleep(.1)
Dan Shic68fefb2015-04-07 10:10:52 -0700364 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700365 logging.debug('Destroy container %s before aborting the autoserv '
366 'process.', container_name)
Dan Shi3f1b8a52015-04-21 11:11:06 -0700367 metadata = {'drone': socket.gethostname(),
368 'job_id': job_or_task_id,
369 'container_name': container_name,
370 'action': 'abort',
371 'success': True}
Dan Shicf4d2032015-03-12 15:04:21 -0700372 try:
373 bucket = lxc.ContainerBucket()
374 container = bucket.get(container_name)
375 if container:
376 container.destroy()
377 else:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700378 metadata['success'] = False
379 metadata['error'] = 'container not found'
Dan Shicf4d2032015-03-12 15:04:21 -0700380 logging.debug('Container %s is not found.', container_name)
381 except:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700382 metadata['success'] = False
Dan Shi65374e22016-09-15 16:14:05 -0700383 metadata['error'] = 'Exception: %s' % str(sys.exc_info())
Dan Shicf4d2032015-03-12 15:04:21 -0700384 # Handle any exception so the autoserv process can be aborted.
Dan Shi65374e22016-09-15 16:14:05 -0700385 logging.exception('Failed to destroy container %s.',
386 container_name)
Dan Shi3f1b8a52015-04-21 11:11:06 -0700387 autotest_es.post(use_http=True,
388 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
389 metadata=metadata)
Dan Shie4a4f9f2015-07-20 09:00:25 -0700390 # Try to correct the result file permission again after the
391 # container is destroyed, as the container might have created some
392 # new files in the result folder.
393 if results:
394 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700395
jadmanski0afbb632008-06-06 21:10:57 +0000396 os.killpg(os.getpgrp(), signal.SIGKILL)
mblighfaf0cd42007-11-19 16:00:24 +0000397
jadmanski0afbb632008-06-06 21:10:57 +0000398 # Set signal handler
mblighc2299562009-07-02 19:00:36 +0000399 signal.signal(signal.SIGTERM, handle_sigterm)
mbligha46678d2008-05-01 20:00:01 +0000400
Simran Basid6b83772014-01-06 16:31:30 -0800401 # faulthandler is only needed to debug in the Lab and is not avaliable to
402 # be imported in the chroot as part of VMTest, so Try-Except it.
403 try:
404 import faulthandler
405 faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
406 logging.debug('faulthandler registered on SIGTERM.')
407 except ImportError:
Christopher Grant4beca022015-06-16 15:14:47 -0400408 sys.exc_clear()
Simran Basid6b83772014-01-06 16:31:30 -0800409
David Rochberg8a60d1e2011-02-01 14:22:07 -0500410 # Ignore SIGTTOU's generated by output from forked children.
411 signal.signal(signal.SIGTTOU, signal.SIG_IGN)
412
Alex Millerf1af17e2013-01-09 22:50:32 -0800413 # If we received a SIGALARM, let's be loud about it.
414 signal.signal(signal.SIGALRM, log_alarm)
415
mbligha5f5e542009-12-30 16:57:49 +0000416 # Server side tests that call shell scripts often depend on $USER being set
417 # but depending on how you launch your autotest scheduler it may not be set.
418 os.environ['USER'] = getpass.getuser()
419
mblighb2bea302008-07-24 20:25:57 +0000420 label = parser.options.label
mbligh374f3412009-05-13 21:29:45 +0000421 group_name = parser.options.group_name
mblighb2bea302008-07-24 20:25:57 +0000422 user = parser.options.user
423 client = parser.options.client
424 server = parser.options.server
jadmanski0afbb632008-06-06 21:10:57 +0000425 install_before = parser.options.install_before
mblighb2bea302008-07-24 20:25:57 +0000426 install_after = parser.options.install_after
427 verify = parser.options.verify
428 repair = parser.options.repair
showard45ae8192008-11-05 19:32:53 +0000429 cleanup = parser.options.cleanup
Alex Millercb79ba72013-05-29 14:43:00 -0700430 provision = parser.options.provision
Dan Shi07e09af2013-04-12 09:31:29 -0700431 reset = parser.options.reset
Alex Miller667b5f22014-02-28 15:33:39 -0800432 job_labels = parser.options.job_labels
mblighb2bea302008-07-24 20:25:57 +0000433 no_tee = parser.options.no_tee
jadmanski0afbb632008-06-06 21:10:57 +0000434 parse_job = parser.options.parse_job
mblighe7d9c602009-07-02 19:02:33 +0000435 execution_tag = parser.options.execution_tag
436 if not execution_tag:
437 execution_tag = parse_job
jadmanski0afbb632008-06-06 21:10:57 +0000438 ssh_user = parser.options.ssh_user
439 ssh_port = parser.options.ssh_port
440 ssh_pass = parser.options.ssh_pass
jadmanskidef0c3c2009-03-25 20:07:10 +0000441 collect_crashinfo = parser.options.collect_crashinfo
mblighe0cbc912010-03-11 18:03:07 +0000442 control_filename = parser.options.control_filename
Scott Zawalski91493c82013-01-25 16:15:20 -0500443 test_retry = parser.options.test_retry
beepscb6f1e22013-06-28 19:14:10 -0700444 verify_job_repo_url = parser.options.verify_job_repo_url
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700445 skip_crash_collection = parser.options.skip_crash_collection
Aviv Keshet18ee3142013-08-12 15:01:51 -0700446 ssh_verbosity = int(parser.options.ssh_verbosity)
Fang Deng6cc20de2013-09-06 15:47:32 -0700447 ssh_options = parser.options.ssh_options
Dan Shib669cbd2013-09-13 11:17:17 -0700448 no_use_packaging = parser.options.no_use_packaging
Simran Basi1bf60eb2015-12-01 16:39:29 -0800449 host_attributes = parser.options.host_attributes
450 in_lab = bool(parser.options.lab)
mbligha46678d2008-05-01 20:00:01 +0000451
mblighb2bea302008-07-24 20:25:57 +0000452 # can't be both a client and a server side test
453 if client and server:
Eric Li861b2d52011-02-04 14:50:35 -0800454 parser.parser.error("Can not specify a test as both server and client!")
mblighb2bea302008-07-24 20:25:57 +0000455
Alex Millercb79ba72013-05-29 14:43:00 -0700456 if provision and client:
457 parser.parser.error("Cannot specify provisioning and client!")
458
459 is_special_task = (verify or repair or cleanup or collect_crashinfo or
Dan Shi07e09af2013-04-12 09:31:29 -0700460 provision or reset)
Alex Millercb79ba72013-05-29 14:43:00 -0700461 if len(parser.args) < 1 and not is_special_task:
Eric Li861b2d52011-02-04 14:50:35 -0800462 parser.parser.error("Missing argument: control file")
mbligha46678d2008-05-01 20:00:01 +0000463
Aviv Keshet18ee3142013-08-12 15:01:51 -0700464 if ssh_verbosity > 0:
465 # ssh_verbosity is an integer between 0 and 3, inclusive
466 ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
Fang Dengd1c2b732013-08-20 12:59:46 -0700467 else:
468 ssh_verbosity_flag = ''
Aviv Keshet18ee3142013-08-12 15:01:51 -0700469
showard45ae8192008-11-05 19:32:53 +0000470 # We have a control file unless it's just a verify/repair/cleanup job
jadmanski0afbb632008-06-06 21:10:57 +0000471 if len(parser.args) > 0:
472 control = parser.args[0]
473 else:
474 control = None
mbligha46678d2008-05-01 20:00:01 +0000475
Dan Shicf4d2032015-03-12 15:04:21 -0700476 machines = _get_machines(parser)
mbligh374f3412009-05-13 21:29:45 +0000477 if group_name and len(machines) < 2:
Dan Shicf4d2032015-03-12 15:04:21 -0700478 parser.parser.error('-G %r may only be supplied with more than one '
479 'machine.' % group_name)
mbligh374f3412009-05-13 21:29:45 +0000480
Christopher Wiley8a91f232013-07-09 11:02:27 -0700481 kwargs = {'group_name': group_name, 'tag': execution_tag,
Dan Shicf4d2032015-03-12 15:04:21 -0700482 'disable_sysinfo': parser.options.disable_sysinfo}
Dan Shi70647ca2015-07-16 22:52:35 -0700483 if parser.options.parent_job_id:
484 kwargs['parent_job_id'] = int(parser.options.parent_job_id)
mblighe0cbc912010-03-11 18:03:07 +0000485 if control_filename:
486 kwargs['control_filename'] = control_filename
Simran Basi1bf60eb2015-12-01 16:39:29 -0800487 if host_attributes:
488 kwargs['host_attributes'] = host_attributes
489 kwargs['in_lab'] = in_lab
jadmanski0afbb632008-06-06 21:10:57 +0000490 job = server_job.server_job(control, parser.args[1:], results, label,
491 user, machines, client, parse_job,
Fang Dengd1c2b732013-08-20 12:59:46 -0700492 ssh_user, ssh_port, ssh_pass,
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700493 ssh_verbosity_flag, ssh_options,
494 test_retry, **kwargs)
Dan Shicf4d2032015-03-12 15:04:21 -0700495
showard75cdfee2009-06-10 17:40:41 +0000496 job.logging.start_logging()
mbligh4608b002010-01-05 18:22:35 +0000497 job.init_parser()
mbligha46678d2008-05-01 20:00:01 +0000498
mbligh161fe6f2008-06-19 16:26:04 +0000499 # perform checks
500 job.precheck()
501
jadmanski0afbb632008-06-06 21:10:57 +0000502 # run the job
503 exit_code = 0
Dan Shic1b8bdd2015-09-14 23:11:24 -0700504 auto_start_servod = _CONFIG.get_config_value(
505 'AUTOSERV', 'auto_start_servod', type=bool, default=False)
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700506
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800507 site_utils.SetupTsMonGlobalState('autoserv', indirect=False,
508 short_lived=True)
jadmanski0afbb632008-06-06 21:10:57 +0000509 try:
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800510 try:
511 if repair:
512 if auto_start_servod and len(machines) == 1:
513 _start_servod(machines[0])
514 job.repair(job_labels)
515 elif verify:
516 job.verify(job_labels)
517 elif provision:
518 job.provision(job_labels)
519 elif reset:
520 job.reset(job_labels)
521 elif cleanup:
522 job.cleanup(job_labels)
523 else:
524 if auto_start_servod and len(machines) == 1:
525 _start_servod(machines[0])
526 if use_ssp:
527 try:
528 _run_with_ssp(job, container_name, job_or_task_id,
529 results, parser, ssp_url, job_folder,
530 machines)
531 finally:
532 # Update the ownership of files in result folder.
533 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700534 else:
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800535 if collect_crashinfo:
536 # Update the ownership of files in result folder. If the
537 # job to collect crashinfo was running inside container
538 # (SSP) and crashed before correcting folder permission,
539 # the result folder might have wrong permission setting.
Dan Shiafa63872016-02-23 15:32:31 -0800540 try:
541 correct_results_folder_permission(results)
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800542 except:
543 # Ignore any error as the user may not have root
544 # permission to run sudo command.
545 pass
Aviv Keshet92bf7b62017-02-13 15:34:03 -0800546 metric_name = ('chromeos/autotest/experimental/'
547 'autoserv_job_run_duration')
548 f = {'in_container': utils.is_in_container(),
549 'success': False}
550 with metrics.SecondsTimer(metric_name, fields=f) as c:
551 job.run(install_before, install_after,
552 verify_job_repo_url=verify_job_repo_url,
553 only_collect_crashinfo=collect_crashinfo,
554 skip_crash_collection=skip_crash_collection,
555 job_labels=job_labels,
556 use_packaging=(not no_use_packaging))
557 c['success'] = True
558
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800559 finally:
560 while job.hosts:
561 host = job.hosts.pop()
562 host.close()
jadmanski0afbb632008-06-06 21:10:57 +0000563 except:
jadmanski27b37ea2008-10-29 23:54:31 +0000564 exit_code = 1
jadmanski0afbb632008-06-06 21:10:57 +0000565 traceback.print_exc()
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800566 finally:
567 metrics.Flush()
mbligha46678d2008-05-01 20:00:01 +0000568
mblighff7d61f2008-12-22 14:53:35 +0000569 if pid_file_manager:
570 pid_file_manager.num_tests_failed = job.num_tests_failed
571 pid_file_manager.close_file(exit_code)
jadmanskie0dffc32008-12-15 17:30:30 +0000572 job.cleanup_parser()
showard21baa452008-10-21 00:08:39 +0000573
jadmanski27b37ea2008-10-29 23:54:31 +0000574 sys.exit(exit_code)
mbligha46678d2008-05-01 20:00:01 +0000575
576
Fang Deng042c1472014-10-23 13:56:41 -0700577def record_autoserv(options, duration_secs):
578 """Record autoserv end-to-end time in metadata db.
579
580 @param options: parser options.
581 @param duration_secs: How long autoserv has taken, in secs.
582 """
583 # Get machine hostname
584 machines = options.machines.replace(
585 ',', ' ').strip().split() if options.machines else []
586 num_machines = len(machines)
587 if num_machines > 1:
588 # Skip the case where atomic group is used.
589 return
590 elif num_machines == 0:
591 machines.append('hostless')
592
593 # Determine the status that will be reported.
594 s = job_overhead.STATUS
595 task_mapping = {
596 'reset': s.RESETTING, 'verify': s.VERIFYING,
597 'provision': s.PROVISIONING, 'repair': s.REPAIRING,
598 'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
Dan Shi888cfca2015-07-31 15:49:00 -0700599 match = filter(lambda task: getattr(options, task, False) == True,
600 task_mapping)
Fang Deng042c1472014-10-23 13:56:41 -0700601 status = task_mapping[match[0]] if match else s.RUNNING
602 is_special_task = status not in [s.RUNNING, s.GATHERING]
Dan Shicf4d2032015-03-12 15:04:21 -0700603 job_or_task_id = job_directories.get_job_id_or_task_id(options.results)
Fang Deng042c1472014-10-23 13:56:41 -0700604 job_overhead.record_state_duration(
605 job_or_task_id, machines[0], status, duration_secs,
606 is_special_task=is_special_task)
607
608
mbligha46678d2008-05-01 20:00:01 +0000609def main():
Fang Deng042c1472014-10-23 13:56:41 -0700610 start_time = datetime.datetime.now()
jadmanski0afbb632008-06-06 21:10:57 +0000611 # grab the parser
612 parser = autoserv_parser.autoserv_parser
mbligha5cb4062009-02-17 15:53:39 +0000613 parser.parse_args()
mbligha46678d2008-05-01 20:00:01 +0000614
jadmanski0afbb632008-06-06 21:10:57 +0000615 if len(sys.argv) == 1:
616 parser.parser.print_help()
617 sys.exit(1)
mbligha6f13082008-06-05 23:53:46 +0000618
Dan Shicf4d2032015-03-12 15:04:21 -0700619 # If the job requires to run with server-side package, try to stage server-
620 # side package first. If that fails with error that autotest server package
Dan Shic68fefb2015-04-07 10:10:52 -0700621 # does not exist, fall back to run the job without using server-side
622 # packaging. If option warn_no_ssp is specified, that means autoserv is
623 # running in a drone does not support SSP, thus no need to stage server-side
624 # package.
Dan Shicf4d2032015-03-12 15:04:21 -0700625 ssp_url = None
Dan Shi0b754c52015-04-20 14:20:38 -0700626 ssp_url_warning = False
Dan Shic68fefb2015-04-07 10:10:52 -0700627 if (not parser.options.warn_no_ssp and parser.options.require_ssp):
Dan Shi14de7622016-08-22 11:09:06 -0700628 ssp_url, ssp_error_msg = _stage_ssp(parser)
Dan Shi0b754c52015-04-20 14:20:38 -0700629 # The build does not have autotest server package. Fall back to not
630 # to use server-side package. Logging is postponed until logging being
631 # set up.
632 ssp_url_warning = not ssp_url
Dan Shicf4d2032015-03-12 15:04:21 -0700633
showard75cdfee2009-06-10 17:40:41 +0000634 if parser.options.no_logging:
635 results = None
636 else:
637 results = parser.options.results
mbligh80e1eba2008-11-19 00:26:18 +0000638 if not results:
639 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
Dan Shi14de7622016-08-22 11:09:06 -0700640 results = os.path.abspath(results)
showard566d3c02010-01-12 18:57:01 +0000641 resultdir_exists = False
642 for filename in ('control.srv', 'status.log', '.autoserv_execute'):
643 if os.path.exists(os.path.join(results, filename)):
644 resultdir_exists = True
mbligh4608b002010-01-05 18:22:35 +0000645 if not parser.options.use_existing_results and resultdir_exists:
mbligh80e1eba2008-11-19 00:26:18 +0000646 error = "Error: results directory already exists: %s\n" % results
647 sys.stderr.write(error)
648 sys.exit(1)
mbligha788dc42009-03-26 21:10:16 +0000649
650 # Now that we certified that there's no leftover results dir from
651 # previous jobs, lets create the result dir since the logging system
652 # needs to create the log file in there.
653 if not os.path.isdir(results):
654 os.makedirs(results)
showard75cdfee2009-06-10 17:40:41 +0000655
Dan Shic68fefb2015-04-07 10:10:52 -0700656 # Server-side packaging will only be used if it's required and the package
657 # is available. If warn_no_ssp is specified, it means that autoserv is
658 # running in a drone does not have SSP supported and a warning will be logs.
659 # Therefore, it should not run with SSP.
660 use_ssp = (not parser.options.warn_no_ssp and parser.options.require_ssp
661 and ssp_url)
662 if use_ssp:
Dan Shie28de552015-05-06 16:51:58 -0700663 log_dir = os.path.join(results, 'ssp_logs') if results else None
Dan Shicf4d2032015-03-12 15:04:21 -0700664 if log_dir and not os.path.exists(log_dir):
665 os.makedirs(log_dir)
666 else:
667 log_dir = results
Dan Shi3f1b8a52015-04-21 11:11:06 -0700668
showard75cdfee2009-06-10 17:40:41 +0000669 logging_manager.configure_logging(
Dan Shicf4d2032015-03-12 15:04:21 -0700670 server_logging_config.ServerLoggingConfig(),
671 results_dir=log_dir,
showard10d84172009-06-18 23:16:50 +0000672 use_console=not parser.options.no_tee,
673 verbose=parser.options.verbose,
674 no_console_prefix=parser.options.no_console_prefix)
Dan Shicf4d2032015-03-12 15:04:21 -0700675
Dan Shi0b754c52015-04-20 14:20:38 -0700676 if ssp_url_warning:
677 logging.warn(
678 'Autoserv is required to run with server-side packaging. '
679 'However, no server-side package can be found based on '
Dan Shi6450e142016-03-11 11:52:20 -0800680 '`--image`, host attribute job_repo_url or host OS version '
681 'label. It could be that the build to test is older than the '
682 'minimum version that supports server-side packaging. The test '
Dan Shi14de7622016-08-22 11:09:06 -0700683 'will be executed without using erver-side packaging. '
684 'Following is the detailed error:\n%s', ssp_error_msg)
Dan Shi0b754c52015-04-20 14:20:38 -0700685
showard75cdfee2009-06-10 17:40:41 +0000686 if results:
mbligha788dc42009-03-26 21:10:16 +0000687 logging.info("Results placed in %s" % results)
mbligh10717632008-11-19 00:21:57 +0000688
mbligh4608b002010-01-05 18:22:35 +0000689 # wait until now to perform this check, so it get properly logged
Dan Shicf4d2032015-03-12 15:04:21 -0700690 if (parser.options.use_existing_results and not resultdir_exists and
Dan Shiff78f112015-06-12 13:34:02 -0700691 not utils.is_in_container()):
mbligh4608b002010-01-05 18:22:35 +0000692 logging.error("No existing results directory found: %s", results)
693 sys.exit(1)
694
Dan Shicf4d2032015-03-12 15:04:21 -0700695 logging.debug('autoserv is running in drone %s.', socket.gethostname())
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700696 logging.debug('autoserv command was: %s', ' '.join(sys.argv))
mbligh4608b002010-01-05 18:22:35 +0000697
Dan Shicf4d2032015-03-12 15:04:21 -0700698 if parser.options.write_pidfile and results:
mbligh4608b002010-01-05 18:22:35 +0000699 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
700 results)
jadmanskid5ab8c52008-12-03 16:27:07 +0000701 pid_file_manager.open_file()
mblighff7d61f2008-12-22 14:53:35 +0000702 else:
703 pid_file_manager = None
mbligha46678d2008-05-01 20:00:01 +0000704
jadmanskif22fea82008-11-26 20:57:07 +0000705 autotest.BaseAutotest.set_install_in_tmpdir(
706 parser.options.install_in_tmpdir)
707
Dan Shia1ecd5c2013-06-06 11:21:31 -0700708 try:
709 # Take the first argument as control file name, get the test name from
Prathmesh Prabhueb7cfc12016-11-23 16:37:30 -0800710 # the control file.
Dan Shia1ecd5c2013-06-06 11:21:31 -0700711 if (len(parser.args) > 0 and parser.args[0] != '' and
712 parser.options.machines):
Dan Shibbc16132013-07-09 16:23:59 -0700713 try:
714 test_name = control_data.parse_control(parser.args[0],
715 raise_warnings=True).name
716 except control_data.ControlVariableException:
717 logging.debug('Failed to retrieve test name from control file.')
718 test_name = None
Dan Shia1ecd5c2013-06-06 11:21:31 -0700719 except control_data.ControlVariableException as e:
720 logging.error(str(e))
jadmanski0afbb632008-06-06 21:10:57 +0000721 exit_code = 0
Prashanth B6285f6a2014-05-08 18:01:27 -0700722 # TODO(beeps): Extend this to cover different failure modes.
723 # Testing exceptions are matched against labels sent to autoserv. Eg,
724 # to allow only the hostless job to run, specify
725 # testing_exceptions: test_suite in the shadow_config. To allow both
726 # the hostless job and dummy_Pass to run, specify
727 # testing_exceptions: test_suite,dummy_Pass. You can figure out
728 # what label autoserv is invoked with by looking through the logs of a test
729 # for the autoserv command's -l option.
Dan Shia06f3e22015-09-03 16:15:15 -0700730 testing_exceptions = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700731 'AUTOSERV', 'testing_exceptions', type=list, default=[])
Dan Shia06f3e22015-09-03 16:15:15 -0700732 test_mode = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700733 'AUTOSERV', 'testing_mode', type=bool, default=False)
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800734 test_mode = (results_mocker and test_mode and not
735 any([ex in parser.options.label
736 for ex in testing_exceptions]))
737 is_task = (parser.options.verify or parser.options.repair or
738 parser.options.provision or parser.options.reset or
739 parser.options.cleanup or parser.options.collect_crashinfo)
jadmanski0afbb632008-06-06 21:10:57 +0000740 try:
741 try:
Prashanth B6285f6a2014-05-08 18:01:27 -0700742 if test_mode:
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800743 # The parser doesn't run on tasks anyway, so we can just return
744 # happy signals without faking results.
745 if not is_task:
746 machine = parser.options.results.split('/')[-1]
747
748 # TODO(beeps): The proper way to do this would be to
749 # refactor job creation so we can invoke job.record
750 # directly. To do that one needs to pipe the test_name
751 # through run_autoserv and bail just before invoking
752 # the server job. See the comment in
753 # puppylab/results_mocker for more context.
754 results_mocker.ResultsMocker(
Prashanth Balasubramanian22dd2262014-11-28 18:19:18 -0800755 test_name if test_name else 'unknown-test',
756 parser.options.results, machine
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800757 ).mock_results()
758 return
Prashanth B6285f6a2014-05-08 18:01:27 -0700759 else:
Dan Shic68fefb2015-04-07 10:10:52 -0700760 run_autoserv(pid_file_manager, results, parser, ssp_url,
761 use_ssp)
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700762 except SystemExit as e:
jadmanski0afbb632008-06-06 21:10:57 +0000763 exit_code = e.code
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700764 if exit_code:
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700765 logging.exception('Uncaught SystemExit with code %s', exit_code)
766 except Exception:
jadmanski0afbb632008-06-06 21:10:57 +0000767 # If we don't know what happened, we'll classify it as
768 # an 'abort' and return 1.
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700769 logging.exception('Uncaught Exception, exit_code = 1.')
jadmanski0afbb632008-06-06 21:10:57 +0000770 exit_code = 1
771 finally:
mblighff7d61f2008-12-22 14:53:35 +0000772 if pid_file_manager:
773 pid_file_manager.close_file(exit_code)
Fang Deng042c1472014-10-23 13:56:41 -0700774 # Record the autoserv duration time. Must be called
775 # just before the system exits to ensure accuracy.
776 duration_secs = (datetime.datetime.now() - start_time).total_seconds()
777 record_autoserv(parser.options, duration_secs)
jadmanski0afbb632008-06-06 21:10:57 +0000778 sys.exit(exit_code)
mblighfaf0cd42007-11-19 16:00:24 +0000779
mblighbb421852008-03-11 22:36:16 +0000780
mbligha46678d2008-05-01 20:00:01 +0000781if __name__ == '__main__':
jadmanski0afbb632008-06-06 21:10:57 +0000782 main()