blob: e826059b16b674144a697a8c47576bfe1643e5c5 [file] [log] [blame]
mbligh6203ace2007-10-04 21:54:24 +00001#!/usr/bin/python -u
mbligh1ffd5dc2008-11-25 13:24:05 +00002# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
mbligh82648e52008-11-20 16:54:25 +00003# Released under the GPL v2
mblighdcd57a82007-07-11 23:06:47 +00004
mblighc8949b82007-07-23 16:33:58 +00005"""
Aviv Keshetde6bb192013-01-30 16:17:22 -08006Run a control file through the server side engine
mblighdcd57a82007-07-11 23:06:47 +00007"""
mbligh1ffd5dc2008-11-25 13:24:05 +00008
Fang Deng042c1472014-10-23 13:56:41 -07009import datetime
Paul Hobbs20cc72a2016-08-30 16:57:05 -070010import contextlib
Fang Deng042c1472014-10-23 13:56:41 -070011import getpass
12import logging
13import os
14import re
15import signal
Dan Shicf4d2032015-03-12 15:04:21 -070016import socket
Fang Deng042c1472014-10-23 13:56:41 -070017import sys
18import traceback
19import time
20import urllib2
mbligh1ffd5dc2008-11-25 13:24:05 +000021
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080022
mblighf5427bb2008-04-09 15:55:57 +000023import common
Dan Shi4f8c0242017-07-07 15:34:49 -070024from autotest_lib.client.bin.result_tools import utils as result_utils
25from autotest_lib.client.bin.result_tools import view as result_view
Dan Shia1ecd5c2013-06-06 11:21:31 -070026from autotest_lib.client.common_lib import control_data
Dan Shi32649b82015-08-29 20:53:36 -070027from autotest_lib.client.common_lib import error
Dan Shia1ecd5c2013-06-06 11:21:31 -070028from autotest_lib.client.common_lib import global_config
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080029
Dan Shi5e2efb72017-02-07 11:40:23 -080030try:
31 from chromite.lib import metrics
32except ImportError:
33 metrics = utils.metrics_mock
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080034
Dan Shia06f3e22015-09-03 16:15:15 -070035_CONFIG = global_config.global_config
36
Dan Shia1ecd5c2013-06-06 11:21:31 -070037
Jakob Jueliche497b552014-09-23 19:11:59 -070038# Number of seconds to wait before returning if testing mode is enabled
Prashanth B6285f6a2014-05-08 18:01:27 -070039TESTING_MODE_SLEEP_SECS = 1
Jakob Jueliche497b552014-09-23 19:11:59 -070040
mbligh9ff89cd2009-09-03 20:28:17 +000041
Kevin Cheng9b6930f2016-07-20 14:57:15 -070042from autotest_lib.server import frontend
showard75cdfee2009-06-10 17:40:41 +000043from autotest_lib.server import server_logging_config
showard043c62a2009-06-10 19:48:57 +000044from autotest_lib.server import server_job, utils, autoserv_parser, autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070045from autotest_lib.server import utils as server_utils
Paul Hobbs20cc72a2016-08-30 16:57:05 -070046from autotest_lib.server import site_utils
Kevin Chengadc99f92016-07-20 08:21:58 -070047from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Dan Shicf4d2032015-03-12 15:04:21 -070048from autotest_lib.site_utils import job_directories
Fang Deng042c1472014-10-23 13:56:41 -070049from autotest_lib.site_utils import job_overhead
Dan Shicf4d2032015-03-12 15:04:21 -070050from autotest_lib.site_utils import lxc
Ben Kwa966db082017-06-05 14:17:23 -070051from autotest_lib.site_utils.lxc import utils as lxc_utils
showard75cdfee2009-06-10 17:40:41 +000052from autotest_lib.client.common_lib import pidfile, logging_manager
mbligh92c0fc22008-11-20 16:52:23 +000053
Paul Hobbs20cc72a2016-08-30 16:57:05 -070054
Dan Shicf4d2032015-03-12 15:04:21 -070055# Control segment to stage server-side package.
56STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
57 'stage_server_side_package')
58
Dan Shia06f3e22015-09-03 16:15:15 -070059# Command line to start servod in a moblab.
60START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s'
61STOP_SERVOD_CMD = 'sudo stop servod'
62
Alex Millerf1af17e2013-01-09 22:50:32 -080063def log_alarm(signum, frame):
64 logging.error("Received SIGALARM. Ignoring and continuing on.")
Alex Miller0528d6f2013-01-11 10:49:48 -080065 sys.exit(1)
Alex Millerf1af17e2013-01-09 22:50:32 -080066
Dan Shicf4d2032015-03-12 15:04:21 -070067
68def _get_machines(parser):
69 """Get a list of machine names from command line arg -m or a file.
70
71 @param parser: Parser for the command line arguments.
72
73 @return: A list of machine names from command line arg -m or the
74 machines file specified in the command line arg -M.
75 """
76 if parser.options.machines:
77 machines = parser.options.machines.replace(',', ' ').strip().split()
78 else:
79 machines = []
80 machines_file = parser.options.machines_file
81 if machines_file:
82 machines = []
83 for m in open(machines_file, 'r').readlines():
84 # remove comments, spaces
85 m = re.sub('#.*', '', m).strip()
86 if m:
87 machines.append(m)
88 logging.debug('Read list of machines from file: %s', machines_file)
89 logging.debug('Machines: %s', ','.join(machines))
90
91 if machines:
92 for machine in machines:
93 if not machine or re.search('\s', machine):
94 parser.parser.error("Invalid machine: %s" % str(machine))
95 machines = list(set(machines))
96 machines.sort()
97 return machines
98
99
Prathmesh Prabhu588007d2017-06-15 00:31:31 -0700100def _stage_ssp(parser, resultsdir):
Dan Shicf4d2032015-03-12 15:04:21 -0700101 """Stage server-side package.
102
103 This function calls a control segment to stage server-side package based on
104 the job and autoserv command line option. The detail implementation could
105 be different for each host type. Currently, only CrosHost has
106 stage_server_side_package function defined.
107 The script returns None if no server-side package is available. However,
108 it may raise exception if it failed for reasons other than artifact (the
109 server-side package) not found.
110
111 @param parser: Command line arguments parser passed in the autoserv process.
Prathmesh Prabhu588007d2017-06-15 00:31:31 -0700112 @param resultsdir: Folder to store results. This could be different from
113 parser.options.results: parser.options.results can be set to None
114 for results to be stored in a temp folder. resultsdir can be None
115 for autoserv run requires no logging.
Dan Shicf4d2032015-03-12 15:04:21 -0700116
Dan Shi14de7622016-08-22 11:09:06 -0700117 @return: (ssp_url, error_msg), where
118 ssp_url is a url to the autotest server-side package. None if
119 server-side package is not supported.
120 error_msg is a string indicating the failures. None if server-
121 side package is staged successfully.
Dan Shicf4d2032015-03-12 15:04:21 -0700122 """
Kevin Chengadc99f92016-07-20 08:21:58 -0700123 machines_list = _get_machines(parser)
Prathmesh Prabhu588007d2017-06-15 00:31:31 -0700124 machines_list = server_job.get_machine_dicts(machines_list, resultsdir,
125 parser.options.lab,
126 parser.options.host_attributes)
Kevin Chengadc99f92016-07-20 08:21:58 -0700127
Dan Shi36cfd832014-10-10 13:38:51 -0700128 # If test_source_build is not specified, default to use server-side test
129 # code from build specified in --image.
Kevin Chengadc99f92016-07-20 08:21:58 -0700130 namespace = {'machines': machines_list,
Dan Shi36cfd832014-10-10 13:38:51 -0700131 'image': (parser.options.test_source_build or
132 parser.options.image),}
Dan Shicf4d2032015-03-12 15:04:21 -0700133 script_locals = {}
134 execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
Dan Shi14de7622016-08-22 11:09:06 -0700135 return script_locals['ssp_url'], script_locals['error_msg']
Dan Shicf4d2032015-03-12 15:04:21 -0700136
137
Dan Shiafa63872016-02-23 15:32:31 -0800138def _run_with_ssp(job, container_name, job_id, results, parser, ssp_url,
Dan Shi3be35af2016-08-25 23:22:40 -0700139 job_folder, machines):
Dan Shicf4d2032015-03-12 15:04:21 -0700140 """Run the server job with server-side packaging.
141
Dan Shi37befda2015-12-07 13:16:56 -0800142 @param job: The server job object.
Dan Shicf4d2032015-03-12 15:04:21 -0700143 @param container_name: Name of the container to run the test.
144 @param job_id: ID of the test job.
145 @param results: Folder to store results. This could be different from
146 parser.options.results:
147 parser.options.results can be set to None for results to be
148 stored in a temp folder.
149 results can be None for autoserv run requires no logging.
150 @param parser: Command line parser that contains the options.
151 @param ssp_url: url of the staged server-side package.
Dan Shiafa63872016-02-23 15:32:31 -0800152 @param job_folder: Name of the job result folder.
Dan Shi3be35af2016-08-25 23:22:40 -0700153 @param machines: A list of machines to run the test.
Dan Shicf4d2032015-03-12 15:04:21 -0700154 """
155 bucket = lxc.ContainerBucket()
156 control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
157 else None)
Dan Shi37befda2015-12-07 13:16:56 -0800158 try:
Dan Shi3be35af2016-08-25 23:22:40 -0700159 dut_name = machines[0] if len(machines) >= 1 else None
Dan Shi37befda2015-12-07 13:16:56 -0800160 test_container = bucket.setup_test(container_name, job_id, ssp_url,
Dan Shiafa63872016-02-23 15:32:31 -0800161 results, control=control,
Dan Shi3be35af2016-08-25 23:22:40 -0700162 job_folder=job_folder,
163 dut_name=dut_name)
Dan Shi37befda2015-12-07 13:16:56 -0800164 except Exception as e:
165 job.record('FAIL', None, None,
166 'Failed to setup container for test: %s. Check logs in '
167 'ssp_logs folder for more details.' % e)
168 raise
169
Dan Shicf4d2032015-03-12 15:04:21 -0700170 args = sys.argv[:]
171 args.remove('--require-ssp')
Dan Shi77b79a62015-07-29 16:22:05 -0700172 # --parent_job_id is only useful in autoserv running in host, not in
173 # container. Include this argument will cause test to fail for builds before
174 # CL 286265 was merged.
175 if '--parent_job_id' in args:
176 index = args.index('--parent_job_id')
177 args.remove('--parent_job_id')
178 # Remove the actual parent job id in command line arg.
179 del args[index]
Dan Shicf4d2032015-03-12 15:04:21 -0700180
181 # A dictionary of paths to replace in the command line. Key is the path to
182 # be replaced with the one in value.
183 paths_to_replace = {}
184 # Replace the control file path with the one in container.
185 if control:
186 container_control_filename = os.path.join(
187 lxc.CONTROL_TEMP_PATH, os.path.basename(control))
188 paths_to_replace[control] = container_control_filename
189 # Update result directory with the one in container.
Dan Shi65374e22016-09-15 16:14:05 -0700190 container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_folder)
Dan Shicf4d2032015-03-12 15:04:21 -0700191 if parser.options.results:
Dan Shicf4d2032015-03-12 15:04:21 -0700192 paths_to_replace[parser.options.results] = container_result_dir
193 # Update parse_job directory with the one in container. The assumption is
194 # that the result folder to be parsed is always the same as the results_dir.
195 if parser.options.parse_job:
Dan Shicf4d2032015-03-12 15:04:21 -0700196 paths_to_replace[parser.options.parse_job] = container_result_dir
197
198 args = [paths_to_replace.get(arg, arg) for arg in args]
199
200 # Apply --use-existing-results, results directory is aready created and
201 # mounted in container. Apply this arg to avoid exception being raised.
202 if not '--use-existing-results' in args:
203 args.append('--use-existing-results')
204
205 # Make sure autoserv running in container using a different pid file.
206 if not '--pidfile-label' in args:
207 args.extend(['--pidfile-label', 'container_autoserv'])
208
Dan Shid1f51232015-04-18 00:29:14 -0700209 cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
Dan Shicf4d2032015-03-12 15:04:21 -0700210 logging.info('Run command in container: %s', cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700211 success = False
Dan Shicf4d2032015-03-12 15:04:21 -0700212 try:
213 test_container.attach_run(cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700214 success = True
Dan Shi9d3454e2015-12-08 09:16:08 -0800215 except Exception as e:
216 # If the test run inside container fails without generating any log,
217 # write a message to status.log to help troubleshooting.
218 debug_files = os.listdir(os.path.join(results, 'debug'))
219 if not debug_files:
220 job.record('FAIL', None, None,
221 'Failed to run test inside the container: %s. Check '
222 'logs in ssp_logs folder for more details.' % e)
223 raise
Dan Shicf4d2032015-03-12 15:04:21 -0700224 finally:
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -0800225 metrics.Counter(
226 'chromeos/autotest/experimental/execute_job_in_ssp').increment(
227 fields={'success': success})
Dan Shicf4d2032015-03-12 15:04:21 -0700228 test_container.destroy()
229
230
Dan Shi3f1b8a52015-04-21 11:11:06 -0700231def correct_results_folder_permission(results):
232 """Make sure the results folder has the right permission settings.
233
234 For tests running with server-side packaging, the results folder has the
235 owner of root. This must be changed to the user running the autoserv
236 process, so parsing job can access the results folder.
237 TODO(dshi): crbug.com/459344 Remove this function when test container can be
238 unprivileged container.
239
240 @param results: Path to the results folder.
241
242 """
243 if not results:
244 return
245
Aviv Keshetc03de792017-07-18 14:24:31 -0700246 utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
247 utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
Dan Shi3f1b8a52015-04-21 11:11:06 -0700248
249
Dan Shia06f3e22015-09-03 16:15:15 -0700250def _start_servod(machine):
251 """Try to start servod in moblab if it's not already running or running with
252 different board or port.
253
254 @param machine: Name of the dut used for test.
255 """
256 if not utils.is_moblab():
257 return
258
Dan Shi1cded882015-09-23 16:52:26 -0700259 logging.debug('Trying to start servod.')
Dan Shia06f3e22015-09-03 16:15:15 -0700260 try:
Kevin Cheng9b6930f2016-07-20 14:57:15 -0700261 afe = frontend.AFE()
Dan Shia06f3e22015-09-03 16:15:15 -0700262 board = server_utils.get_board_from_afe(machine, afe)
263 hosts = afe.get_hosts(hostname=machine)
264 servo_host = hosts[0].attributes.get('servo_host', None)
265 servo_port = hosts[0].attributes.get('servo_port', 9999)
266 if not servo_host in ['localhost', '127.0.0.1']:
Dan Shi1cded882015-09-23 16:52:26 -0700267 logging.warn('Starting servod is aborted. The dut\'s servo_host '
268 'attribute is not set to localhost.')
Dan Shia06f3e22015-09-03 16:15:15 -0700269 return
270 except (urllib2.HTTPError, urllib2.URLError):
271 # Ignore error if RPC failed to get board
272 logging.error('Failed to get board name from AFE. Start servod is '
273 'aborted')
274 return
275
276 try:
277 pid = utils.run('pgrep servod').stdout
278 cmd_line = utils.run('ps -fp %s' % pid).stdout
279 if ('--board %s' % board in cmd_line and
280 '--port %s' % servo_port in cmd_line):
281 logging.debug('Servod is already running with given board and port.'
282 ' There is no need to restart servod.')
283 return
284 logging.debug('Servod is running with different board or port. '
285 'Stopping existing servod.')
286 utils.run('sudo stop servod')
287 except error.CmdError:
288 # servod is not running.
289 pass
290
291 try:
292 utils.run(START_SERVOD_CMD % (board, servo_port))
293 logging.debug('Servod is started')
294 except error.CmdError as e:
295 logging.error('Servod failed to be started, error: %s', e)
296
297
Dan Shic68fefb2015-04-07 10:10:52 -0700298def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700299 """Run server job with given options.
300
301 @param pid_file_manager: PidFileManager used to monitor the autoserv process
302 @param results: Folder to store results.
303 @param parser: Parser for the command line arguments.
304 @param ssp_url: Url to server-side package.
Dan Shic68fefb2015-04-07 10:10:52 -0700305 @param use_ssp: Set to True to run with server-side packaging.
Dan Shicf4d2032015-03-12 15:04:21 -0700306 """
Dan Shiec1d47d2015-02-13 11:38:13 -0800307 if parser.options.warn_no_ssp:
Dan Shic68fefb2015-04-07 10:10:52 -0700308 # Post a warning in the log.
Dan Shiec1d47d2015-02-13 11:38:13 -0800309 logging.warn('Autoserv is required to run with server-side packaging. '
310 'However, no drone is found to support server-side '
311 'packaging. The test will be executed in a drone without '
312 'server-side packaging supported.')
313
jadmanski0afbb632008-06-06 21:10:57 +0000314 # send stdin to /dev/null
315 dev_null = os.open(os.devnull, os.O_RDONLY)
316 os.dup2(dev_null, sys.stdin.fileno())
317 os.close(dev_null)
mblighdbf37612007-11-24 19:38:11 +0000318
Dan Shie8aeb662016-06-30 11:22:03 -0700319 # Create separate process group if the process is not a process group
320 # leader. This allows autoserv process to keep running after the caller
321 # process (drone manager call) exits.
322 if os.getpid() != os.getpgid(0):
323 os.setsid()
mbligh1d42d4e2007-11-05 22:42:00 +0000324
Dan Shicf4d2032015-03-12 15:04:21 -0700325 # Container name is predefined so the container can be destroyed in
326 # handle_sigterm.
327 job_or_task_id = job_directories.get_job_id_or_task_id(
328 parser.options.results)
329 container_name = (lxc.TEST_CONTAINER_NAME_FMT %
Dan Shid68d51c2015-04-21 17:00:42 -0700330 (job_or_task_id, time.time(), os.getpid()))
Dan Shiafa63872016-02-23 15:32:31 -0800331 job_folder = job_directories.get_job_folder_name(parser.options.results)
Dan Shicf4d2032015-03-12 15:04:21 -0700332
jadmanski0afbb632008-06-06 21:10:57 +0000333 # Implement SIGTERM handler
mblighc2299562009-07-02 19:00:36 +0000334 def handle_sigterm(signum, frame):
Simran Basi9d9b7292013-10-16 16:44:07 -0700335 logging.debug('Received SIGTERM')
mblighff7d61f2008-12-22 14:53:35 +0000336 if pid_file_manager:
337 pid_file_manager.close_file(1, signal.SIGTERM)
Simran Basi49e21e62013-10-17 12:40:33 -0700338 logging.debug('Finished writing to pid_file. Killing process.')
Dan Shi3f1b8a52015-04-21 11:11:06 -0700339
340 # Update results folder's file permission. This needs to be done ASAP
341 # before the parsing process tries to access the log.
342 if use_ssp and results:
343 correct_results_folder_permission(results)
344
Simran Basid6b83772014-01-06 16:31:30 -0800345 # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
346 # This sleep allows the pending output to be logged before the kill
347 # signal is sent.
348 time.sleep(.1)
Dan Shic68fefb2015-04-07 10:10:52 -0700349 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700350 logging.debug('Destroy container %s before aborting the autoserv '
351 'process.', container_name)
352 try:
353 bucket = lxc.ContainerBucket()
354 container = bucket.get(container_name)
355 if container:
356 container.destroy()
357 else:
358 logging.debug('Container %s is not found.', container_name)
359 except:
360 # Handle any exception so the autoserv process can be aborted.
Dan Shi65374e22016-09-15 16:14:05 -0700361 logging.exception('Failed to destroy container %s.',
362 container_name)
Dan Shie4a4f9f2015-07-20 09:00:25 -0700363 # Try to correct the result file permission again after the
364 # container is destroyed, as the container might have created some
365 # new files in the result folder.
366 if results:
367 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700368
jadmanski0afbb632008-06-06 21:10:57 +0000369 os.killpg(os.getpgrp(), signal.SIGKILL)
mblighfaf0cd42007-11-19 16:00:24 +0000370
jadmanski0afbb632008-06-06 21:10:57 +0000371 # Set signal handler
mblighc2299562009-07-02 19:00:36 +0000372 signal.signal(signal.SIGTERM, handle_sigterm)
mbligha46678d2008-05-01 20:00:01 +0000373
Simran Basid6b83772014-01-06 16:31:30 -0800374 # faulthandler is only needed to debug in the Lab and is not avaliable to
375 # be imported in the chroot as part of VMTest, so Try-Except it.
376 try:
377 import faulthandler
378 faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
379 logging.debug('faulthandler registered on SIGTERM.')
380 except ImportError:
Christopher Grant4beca022015-06-16 15:14:47 -0400381 sys.exc_clear()
Simran Basid6b83772014-01-06 16:31:30 -0800382
David Rochberg8a60d1e2011-02-01 14:22:07 -0500383 # Ignore SIGTTOU's generated by output from forked children.
384 signal.signal(signal.SIGTTOU, signal.SIG_IGN)
385
Alex Millerf1af17e2013-01-09 22:50:32 -0800386 # If we received a SIGALARM, let's be loud about it.
387 signal.signal(signal.SIGALRM, log_alarm)
388
mbligha5f5e542009-12-30 16:57:49 +0000389 # Server side tests that call shell scripts often depend on $USER being set
390 # but depending on how you launch your autotest scheduler it may not be set.
391 os.environ['USER'] = getpass.getuser()
392
mblighb2bea302008-07-24 20:25:57 +0000393 label = parser.options.label
mbligh374f3412009-05-13 21:29:45 +0000394 group_name = parser.options.group_name
mblighb2bea302008-07-24 20:25:57 +0000395 user = parser.options.user
396 client = parser.options.client
397 server = parser.options.server
jadmanski0afbb632008-06-06 21:10:57 +0000398 install_before = parser.options.install_before
mblighb2bea302008-07-24 20:25:57 +0000399 install_after = parser.options.install_after
400 verify = parser.options.verify
401 repair = parser.options.repair
showard45ae8192008-11-05 19:32:53 +0000402 cleanup = parser.options.cleanup
Alex Millercb79ba72013-05-29 14:43:00 -0700403 provision = parser.options.provision
Dan Shi07e09af2013-04-12 09:31:29 -0700404 reset = parser.options.reset
Alex Miller667b5f22014-02-28 15:33:39 -0800405 job_labels = parser.options.job_labels
mblighb2bea302008-07-24 20:25:57 +0000406 no_tee = parser.options.no_tee
jadmanski0afbb632008-06-06 21:10:57 +0000407 parse_job = parser.options.parse_job
mblighe7d9c602009-07-02 19:02:33 +0000408 execution_tag = parser.options.execution_tag
409 if not execution_tag:
410 execution_tag = parse_job
jadmanski0afbb632008-06-06 21:10:57 +0000411 ssh_user = parser.options.ssh_user
412 ssh_port = parser.options.ssh_port
413 ssh_pass = parser.options.ssh_pass
jadmanskidef0c3c2009-03-25 20:07:10 +0000414 collect_crashinfo = parser.options.collect_crashinfo
mblighe0cbc912010-03-11 18:03:07 +0000415 control_filename = parser.options.control_filename
Scott Zawalski91493c82013-01-25 16:15:20 -0500416 test_retry = parser.options.test_retry
beepscb6f1e22013-06-28 19:14:10 -0700417 verify_job_repo_url = parser.options.verify_job_repo_url
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700418 skip_crash_collection = parser.options.skip_crash_collection
Aviv Keshet18ee3142013-08-12 15:01:51 -0700419 ssh_verbosity = int(parser.options.ssh_verbosity)
Fang Deng6cc20de2013-09-06 15:47:32 -0700420 ssh_options = parser.options.ssh_options
Dan Shib669cbd2013-09-13 11:17:17 -0700421 no_use_packaging = parser.options.no_use_packaging
Simran Basi1bf60eb2015-12-01 16:39:29 -0800422 host_attributes = parser.options.host_attributes
423 in_lab = bool(parser.options.lab)
mbligha46678d2008-05-01 20:00:01 +0000424
mblighb2bea302008-07-24 20:25:57 +0000425 # can't be both a client and a server side test
426 if client and server:
Eric Li861b2d52011-02-04 14:50:35 -0800427 parser.parser.error("Can not specify a test as both server and client!")
mblighb2bea302008-07-24 20:25:57 +0000428
Alex Millercb79ba72013-05-29 14:43:00 -0700429 if provision and client:
430 parser.parser.error("Cannot specify provisioning and client!")
431
432 is_special_task = (verify or repair or cleanup or collect_crashinfo or
Dan Shi07e09af2013-04-12 09:31:29 -0700433 provision or reset)
Alex Millercb79ba72013-05-29 14:43:00 -0700434 if len(parser.args) < 1 and not is_special_task:
Eric Li861b2d52011-02-04 14:50:35 -0800435 parser.parser.error("Missing argument: control file")
mbligha46678d2008-05-01 20:00:01 +0000436
Aviv Keshet18ee3142013-08-12 15:01:51 -0700437 if ssh_verbosity > 0:
438 # ssh_verbosity is an integer between 0 and 3, inclusive
439 ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
Fang Dengd1c2b732013-08-20 12:59:46 -0700440 else:
441 ssh_verbosity_flag = ''
Aviv Keshet18ee3142013-08-12 15:01:51 -0700442
showard45ae8192008-11-05 19:32:53 +0000443 # We have a control file unless it's just a verify/repair/cleanup job
jadmanski0afbb632008-06-06 21:10:57 +0000444 if len(parser.args) > 0:
445 control = parser.args[0]
446 else:
447 control = None
mbligha46678d2008-05-01 20:00:01 +0000448
Dan Shicf4d2032015-03-12 15:04:21 -0700449 machines = _get_machines(parser)
mbligh374f3412009-05-13 21:29:45 +0000450 if group_name and len(machines) < 2:
Dan Shicf4d2032015-03-12 15:04:21 -0700451 parser.parser.error('-G %r may only be supplied with more than one '
452 'machine.' % group_name)
mbligh374f3412009-05-13 21:29:45 +0000453
Christopher Wiley8a91f232013-07-09 11:02:27 -0700454 kwargs = {'group_name': group_name, 'tag': execution_tag,
Dan Shicf4d2032015-03-12 15:04:21 -0700455 'disable_sysinfo': parser.options.disable_sysinfo}
Dan Shi70647ca2015-07-16 22:52:35 -0700456 if parser.options.parent_job_id:
457 kwargs['parent_job_id'] = int(parser.options.parent_job_id)
mblighe0cbc912010-03-11 18:03:07 +0000458 if control_filename:
459 kwargs['control_filename'] = control_filename
Simran Basi1bf60eb2015-12-01 16:39:29 -0800460 if host_attributes:
461 kwargs['host_attributes'] = host_attributes
462 kwargs['in_lab'] = in_lab
jadmanski0afbb632008-06-06 21:10:57 +0000463 job = server_job.server_job(control, parser.args[1:], results, label,
464 user, machines, client, parse_job,
Fang Dengd1c2b732013-08-20 12:59:46 -0700465 ssh_user, ssh_port, ssh_pass,
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700466 ssh_verbosity_flag, ssh_options,
467 test_retry, **kwargs)
Dan Shicf4d2032015-03-12 15:04:21 -0700468
showard75cdfee2009-06-10 17:40:41 +0000469 job.logging.start_logging()
mbligh4608b002010-01-05 18:22:35 +0000470 job.init_parser()
mbligha46678d2008-05-01 20:00:01 +0000471
mbligh161fe6f2008-06-19 16:26:04 +0000472 # perform checks
473 job.precheck()
474
jadmanski0afbb632008-06-06 21:10:57 +0000475 # run the job
476 exit_code = 0
Dan Shic1b8bdd2015-09-14 23:11:24 -0700477 auto_start_servod = _CONFIG.get_config_value(
478 'AUTOSERV', 'auto_start_servod', type=bool, default=False)
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700479
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800480 site_utils.SetupTsMonGlobalState('autoserv', indirect=False,
481 short_lived=True)
jadmanski0afbb632008-06-06 21:10:57 +0000482 try:
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800483 try:
484 if repair:
485 if auto_start_servod and len(machines) == 1:
486 _start_servod(machines[0])
487 job.repair(job_labels)
488 elif verify:
489 job.verify(job_labels)
490 elif provision:
491 job.provision(job_labels)
492 elif reset:
493 job.reset(job_labels)
494 elif cleanup:
495 job.cleanup(job_labels)
496 else:
497 if auto_start_servod and len(machines) == 1:
498 _start_servod(machines[0])
499 if use_ssp:
500 try:
501 _run_with_ssp(job, container_name, job_or_task_id,
502 results, parser, ssp_url, job_folder,
503 machines)
504 finally:
505 # Update the ownership of files in result folder.
506 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700507 else:
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800508 if collect_crashinfo:
509 # Update the ownership of files in result folder. If the
510 # job to collect crashinfo was running inside container
511 # (SSP) and crashed before correcting folder permission,
512 # the result folder might have wrong permission setting.
Dan Shiafa63872016-02-23 15:32:31 -0800513 try:
514 correct_results_folder_permission(results)
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800515 except:
516 # Ignore any error as the user may not have root
517 # permission to run sudo command.
518 pass
Aviv Keshet92bf7b62017-02-13 15:34:03 -0800519 metric_name = ('chromeos/autotest/experimental/'
520 'autoserv_job_run_duration')
521 f = {'in_container': utils.is_in_container(),
522 'success': False}
523 with metrics.SecondsTimer(metric_name, fields=f) as c:
524 job.run(install_before, install_after,
525 verify_job_repo_url=verify_job_repo_url,
526 only_collect_crashinfo=collect_crashinfo,
527 skip_crash_collection=skip_crash_collection,
528 job_labels=job_labels,
529 use_packaging=(not no_use_packaging))
530 c['success'] = True
531
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800532 finally:
Hidehiko Abe06893302017-06-24 07:32:38 +0900533 job.close()
Dan Shiffd5b822017-07-14 11:16:23 -0700534 # Special task doesn't run parse, so result summary needs to be
535 # built here.
536 if results and (repair or verify or reset or cleanup or provision):
Dan Shi4f8c0242017-07-07 15:34:49 -0700537 # Throttle the result on the server side.
538 try:
539 result_utils.execute(
540 results, control_data.DEFAULT_MAX_RESULT_SIZE_KB)
541 except:
542 logging.exception(
543 'Non-critical failure: Failed to throttle results '
544 'in directory %s.', results)
545 # Build result view and report metrics for result sizes.
Dan Shiffd5b822017-07-14 11:16:23 -0700546 site_utils.collect_result_sizes(results)
jadmanski0afbb632008-06-06 21:10:57 +0000547 except:
jadmanski27b37ea2008-10-29 23:54:31 +0000548 exit_code = 1
jadmanski0afbb632008-06-06 21:10:57 +0000549 traceback.print_exc()
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800550 finally:
551 metrics.Flush()
mbligha46678d2008-05-01 20:00:01 +0000552
mblighff7d61f2008-12-22 14:53:35 +0000553 if pid_file_manager:
554 pid_file_manager.num_tests_failed = job.num_tests_failed
555 pid_file_manager.close_file(exit_code)
jadmanskie0dffc32008-12-15 17:30:30 +0000556 job.cleanup_parser()
showard21baa452008-10-21 00:08:39 +0000557
jadmanski27b37ea2008-10-29 23:54:31 +0000558 sys.exit(exit_code)
mbligha46678d2008-05-01 20:00:01 +0000559
560
Fang Deng042c1472014-10-23 13:56:41 -0700561def record_autoserv(options, duration_secs):
562 """Record autoserv end-to-end time in metadata db.
563
564 @param options: parser options.
565 @param duration_secs: How long autoserv has taken, in secs.
566 """
567 # Get machine hostname
568 machines = options.machines.replace(
569 ',', ' ').strip().split() if options.machines else []
570 num_machines = len(machines)
571 if num_machines > 1:
572 # Skip the case where atomic group is used.
573 return
574 elif num_machines == 0:
575 machines.append('hostless')
576
577 # Determine the status that will be reported.
578 s = job_overhead.STATUS
579 task_mapping = {
580 'reset': s.RESETTING, 'verify': s.VERIFYING,
581 'provision': s.PROVISIONING, 'repair': s.REPAIRING,
582 'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
Dan Shi888cfca2015-07-31 15:49:00 -0700583 match = filter(lambda task: getattr(options, task, False) == True,
584 task_mapping)
Fang Deng042c1472014-10-23 13:56:41 -0700585 status = task_mapping[match[0]] if match else s.RUNNING
586 is_special_task = status not in [s.RUNNING, s.GATHERING]
Dan Shicf4d2032015-03-12 15:04:21 -0700587 job_or_task_id = job_directories.get_job_id_or_task_id(options.results)
Fang Deng042c1472014-10-23 13:56:41 -0700588 job_overhead.record_state_duration(
589 job_or_task_id, machines[0], status, duration_secs,
590 is_special_task=is_special_task)
591
592
mbligha46678d2008-05-01 20:00:01 +0000593def main():
Fang Deng042c1472014-10-23 13:56:41 -0700594 start_time = datetime.datetime.now()
jadmanski0afbb632008-06-06 21:10:57 +0000595 # grab the parser
596 parser = autoserv_parser.autoserv_parser
mbligha5cb4062009-02-17 15:53:39 +0000597 parser.parse_args()
mbligha46678d2008-05-01 20:00:01 +0000598
jadmanski0afbb632008-06-06 21:10:57 +0000599 if len(sys.argv) == 1:
600 parser.parser.print_help()
601 sys.exit(1)
mbligha6f13082008-06-05 23:53:46 +0000602
showard75cdfee2009-06-10 17:40:41 +0000603 if parser.options.no_logging:
604 results = None
605 else:
606 results = parser.options.results
mbligh80e1eba2008-11-19 00:26:18 +0000607 if not results:
608 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
Dan Shi14de7622016-08-22 11:09:06 -0700609 results = os.path.abspath(results)
showard566d3c02010-01-12 18:57:01 +0000610 resultdir_exists = False
611 for filename in ('control.srv', 'status.log', '.autoserv_execute'):
612 if os.path.exists(os.path.join(results, filename)):
613 resultdir_exists = True
mbligh4608b002010-01-05 18:22:35 +0000614 if not parser.options.use_existing_results and resultdir_exists:
mbligh80e1eba2008-11-19 00:26:18 +0000615 error = "Error: results directory already exists: %s\n" % results
616 sys.stderr.write(error)
617 sys.exit(1)
mbligha788dc42009-03-26 21:10:16 +0000618
619 # Now that we certified that there's no leftover results dir from
620 # previous jobs, lets create the result dir since the logging system
621 # needs to create the log file in there.
622 if not os.path.isdir(results):
623 os.makedirs(results)
showard75cdfee2009-06-10 17:40:41 +0000624
Prathmesh Prabhu7ae68ae2017-06-15 00:29:31 -0700625 # If the job requires to run with server-side package, try to stage server-
626 # side package first. If that fails with error that autotest server package
627 # does not exist, fall back to run the job without using server-side
628 # packaging. If option warn_no_ssp is specified, that means autoserv is
629 # running in a drone does not support SSP, thus no need to stage server-side
630 # package.
631 ssp_url = None
632 ssp_url_warning = False
633 if (not parser.options.warn_no_ssp and parser.options.require_ssp):
Prathmesh Prabhu588007d2017-06-15 00:31:31 -0700634 ssp_url, ssp_error_msg = _stage_ssp(parser, results)
Prathmesh Prabhu7ae68ae2017-06-15 00:29:31 -0700635 # The build does not have autotest server package. Fall back to not
636 # to use server-side package. Logging is postponed until logging being
637 # set up.
638 ssp_url_warning = not ssp_url
639
Dan Shic68fefb2015-04-07 10:10:52 -0700640 # Server-side packaging will only be used if it's required and the package
641 # is available. If warn_no_ssp is specified, it means that autoserv is
642 # running in a drone does not have SSP supported and a warning will be logs.
643 # Therefore, it should not run with SSP.
644 use_ssp = (not parser.options.warn_no_ssp and parser.options.require_ssp
645 and ssp_url)
646 if use_ssp:
Dan Shie28de552015-05-06 16:51:58 -0700647 log_dir = os.path.join(results, 'ssp_logs') if results else None
Dan Shicf4d2032015-03-12 15:04:21 -0700648 if log_dir and not os.path.exists(log_dir):
649 os.makedirs(log_dir)
650 else:
651 log_dir = results
Dan Shi3f1b8a52015-04-21 11:11:06 -0700652
showard75cdfee2009-06-10 17:40:41 +0000653 logging_manager.configure_logging(
Dan Shicf4d2032015-03-12 15:04:21 -0700654 server_logging_config.ServerLoggingConfig(),
655 results_dir=log_dir,
showard10d84172009-06-18 23:16:50 +0000656 use_console=not parser.options.no_tee,
657 verbose=parser.options.verbose,
658 no_console_prefix=parser.options.no_console_prefix)
Dan Shicf4d2032015-03-12 15:04:21 -0700659
Dan Shi0b754c52015-04-20 14:20:38 -0700660 if ssp_url_warning:
661 logging.warn(
662 'Autoserv is required to run with server-side packaging. '
663 'However, no server-side package can be found based on '
Dan Shi6450e142016-03-11 11:52:20 -0800664 '`--image`, host attribute job_repo_url or host OS version '
665 'label. It could be that the build to test is older than the '
666 'minimum version that supports server-side packaging. The test '
Dan Shi14de7622016-08-22 11:09:06 -0700667 'will be executed without using erver-side packaging. '
668 'Following is the detailed error:\n%s', ssp_error_msg)
Dan Shi0b754c52015-04-20 14:20:38 -0700669
showard75cdfee2009-06-10 17:40:41 +0000670 if results:
mbligha788dc42009-03-26 21:10:16 +0000671 logging.info("Results placed in %s" % results)
mbligh10717632008-11-19 00:21:57 +0000672
mbligh4608b002010-01-05 18:22:35 +0000673 # wait until now to perform this check, so it get properly logged
Dan Shicf4d2032015-03-12 15:04:21 -0700674 if (parser.options.use_existing_results and not resultdir_exists and
Dan Shiff78f112015-06-12 13:34:02 -0700675 not utils.is_in_container()):
mbligh4608b002010-01-05 18:22:35 +0000676 logging.error("No existing results directory found: %s", results)
677 sys.exit(1)
678
Dan Shicf4d2032015-03-12 15:04:21 -0700679 logging.debug('autoserv is running in drone %s.', socket.gethostname())
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700680 logging.debug('autoserv command was: %s', ' '.join(sys.argv))
mbligh4608b002010-01-05 18:22:35 +0000681
Dan Shicf4d2032015-03-12 15:04:21 -0700682 if parser.options.write_pidfile and results:
mbligh4608b002010-01-05 18:22:35 +0000683 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
684 results)
jadmanskid5ab8c52008-12-03 16:27:07 +0000685 pid_file_manager.open_file()
mblighff7d61f2008-12-22 14:53:35 +0000686 else:
687 pid_file_manager = None
mbligha46678d2008-05-01 20:00:01 +0000688
Allen Lid5abdab2017-02-07 16:03:43 -0800689 autotest.Autotest.set_install_in_tmpdir(
jadmanskif22fea82008-11-26 20:57:07 +0000690 parser.options.install_in_tmpdir)
691
jadmanski0afbb632008-06-06 21:10:57 +0000692 exit_code = 0
693 try:
694 try:
Allen Li02480352017-07-17 12:38:26 -0700695 run_autoserv(pid_file_manager, results, parser, ssp_url,
696 use_ssp)
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700697 except SystemExit as e:
jadmanski0afbb632008-06-06 21:10:57 +0000698 exit_code = e.code
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700699 if exit_code:
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700700 logging.exception('Uncaught SystemExit with code %s', exit_code)
701 except Exception:
jadmanski0afbb632008-06-06 21:10:57 +0000702 # If we don't know what happened, we'll classify it as
703 # an 'abort' and return 1.
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700704 logging.exception('Uncaught Exception, exit_code = 1.')
jadmanski0afbb632008-06-06 21:10:57 +0000705 exit_code = 1
706 finally:
mblighff7d61f2008-12-22 14:53:35 +0000707 if pid_file_manager:
708 pid_file_manager.close_file(exit_code)
Fang Deng042c1472014-10-23 13:56:41 -0700709 # Record the autoserv duration time. Must be called
710 # just before the system exits to ensure accuracy.
711 duration_secs = (datetime.datetime.now() - start_time).total_seconds()
712 record_autoserv(parser.options, duration_secs)
jadmanski0afbb632008-06-06 21:10:57 +0000713 sys.exit(exit_code)
mblighfaf0cd42007-11-19 16:00:24 +0000714
mblighbb421852008-03-11 22:36:16 +0000715
mbligha46678d2008-05-01 20:00:01 +0000716if __name__ == '__main__':
jadmanski0afbb632008-06-06 21:10:57 +0000717 main()