blob: 6fd9d31776c6ce6b9fa3de0fb23e9c4d5bacf078 [file] [log] [blame]
mbligh6203ace2007-10-04 21:54:24 +00001#!/usr/bin/python -u
mbligh1ffd5dc2008-11-25 13:24:05 +00002# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
mbligh82648e52008-11-20 16:54:25 +00003# Released under the GPL v2
mblighdcd57a82007-07-11 23:06:47 +00004
mblighc8949b82007-07-23 16:33:58 +00005"""
Aviv Keshetde6bb192013-01-30 16:17:22 -08006Run a control file through the server side engine
mblighdcd57a82007-07-11 23:06:47 +00007"""
mbligh1ffd5dc2008-11-25 13:24:05 +00008
Fang Deng042c1472014-10-23 13:56:41 -07009import datetime
Paul Hobbs20cc72a2016-08-30 16:57:05 -070010import contextlib
Fang Deng042c1472014-10-23 13:56:41 -070011import getpass
12import logging
13import os
14import re
Prathmesh Prabhu46047362018-03-16 10:33:19 -070015import shutil
Fang Deng042c1472014-10-23 13:56:41 -070016import signal
Dan Shicf4d2032015-03-12 15:04:21 -070017import socket
Fang Deng042c1472014-10-23 13:56:41 -070018import sys
19import traceback
20import time
21import urllib2
mbligh1ffd5dc2008-11-25 13:24:05 +000022
mblighf5427bb2008-04-09 15:55:57 +000023import common
Dan Shi4f8c0242017-07-07 15:34:49 -070024from autotest_lib.client.bin.result_tools import utils as result_utils
25from autotest_lib.client.bin.result_tools import view as result_view
Dan Shia1ecd5c2013-06-06 11:21:31 -070026from autotest_lib.client.common_lib import control_data
Prathmesh Prabhu9a631082018-05-11 17:30:09 -070027from autotest_lib.client.common_lib import enum
Dan Shi32649b82015-08-29 20:53:36 -070028from autotest_lib.client.common_lib import error
Dan Shia1ecd5c2013-06-06 11:21:31 -070029from autotest_lib.client.common_lib import global_config
Prathmesh Prabhu9a631082018-05-11 17:30:09 -070030from autotest_lib.client.common_lib import host_queue_entry_states
31from autotest_lib.client.common_lib import host_states
Allen Lif146e872017-08-15 18:24:31 -070032from autotest_lib.server import results_mocker
Prathmesh Prabhu46047362018-03-16 10:33:19 -070033from autotest_lib.server.cros.dynamic_suite import suite
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080034
Dan Shi5e2efb72017-02-07 11:40:23 -080035try:
36 from chromite.lib import metrics
Paul Hobbse9fd5572017-08-22 02:48:25 -070037 from chromite.lib import cloud_trace
Dan Shi5e2efb72017-02-07 11:40:23 -080038except ImportError:
Prathmesh Prabhud16c8012017-08-28 11:42:46 -070039 from autotest_lib.client.common_lib import utils as common_utils
40 metrics = common_utils.metrics_mock
Paul Hobbse9fd5572017-08-22 02:48:25 -070041 import mock
42 cloud_trace = mock.MagicMock()
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080043
Dan Shia06f3e22015-09-03 16:15:15 -070044_CONFIG = global_config.global_config
45
Dan Shia1ecd5c2013-06-06 11:21:31 -070046
Jakob Jueliche497b552014-09-23 19:11:59 -070047# Number of seconds to wait before returning if testing mode is enabled
Prashanth B6285f6a2014-05-08 18:01:27 -070048TESTING_MODE_SLEEP_SECS = 1
Jakob Jueliche497b552014-09-23 19:11:59 -070049
mbligh9ff89cd2009-09-03 20:28:17 +000050
Kevin Cheng9b6930f2016-07-20 14:57:15 -070051from autotest_lib.server import frontend
showard75cdfee2009-06-10 17:40:41 +000052from autotest_lib.server import server_logging_config
showard043c62a2009-06-10 19:48:57 +000053from autotest_lib.server import server_job, utils, autoserv_parser, autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070054from autotest_lib.server import utils as server_utils
Paul Hobbs20cc72a2016-08-30 16:57:05 -070055from autotest_lib.server import site_utils
Kevin Chengadc99f92016-07-20 08:21:58 -070056from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Dan Shicf4d2032015-03-12 15:04:21 -070057from autotest_lib.site_utils import job_directories
Dan Shicf4d2032015-03-12 15:04:21 -070058from autotest_lib.site_utils import lxc
Ben Kwa966db082017-06-05 14:17:23 -070059from autotest_lib.site_utils.lxc import utils as lxc_utils
showard75cdfee2009-06-10 17:40:41 +000060from autotest_lib.client.common_lib import pidfile, logging_manager
mbligh92c0fc22008-11-20 16:52:23 +000061
Paul Hobbs20cc72a2016-08-30 16:57:05 -070062
Dan Shicf4d2032015-03-12 15:04:21 -070063# Control segment to stage server-side package.
64STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
65 'stage_server_side_package')
66
Dan Shia06f3e22015-09-03 16:15:15 -070067# Command line to start servod in a moblab.
68START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s'
69STOP_SERVOD_CMD = 'sudo stop servod'
70
Prathmesh Prabhu46047362018-03-16 10:33:19 -070071_AUTOTEST_ROOT = os.path.realpath(os.path.join(os.path.dirname(__file__), '..'))
72_CONTROL_FILE_FROM_CONTROL_NAME = 'control.from_control_name'
73
Alex Millerf1af17e2013-01-09 22:50:32 -080074def log_alarm(signum, frame):
75 logging.error("Received SIGALARM. Ignoring and continuing on.")
Alex Miller0528d6f2013-01-11 10:49:48 -080076 sys.exit(1)
Alex Millerf1af17e2013-01-09 22:50:32 -080077
Dan Shicf4d2032015-03-12 15:04:21 -070078
79def _get_machines(parser):
80 """Get a list of machine names from command line arg -m or a file.
81
82 @param parser: Parser for the command line arguments.
83
84 @return: A list of machine names from command line arg -m or the
85 machines file specified in the command line arg -M.
86 """
87 if parser.options.machines:
88 machines = parser.options.machines.replace(',', ' ').strip().split()
89 else:
90 machines = []
91 machines_file = parser.options.machines_file
92 if machines_file:
93 machines = []
94 for m in open(machines_file, 'r').readlines():
95 # remove comments, spaces
96 m = re.sub('#.*', '', m).strip()
97 if m:
98 machines.append(m)
99 logging.debug('Read list of machines from file: %s', machines_file)
100 logging.debug('Machines: %s', ','.join(machines))
101
102 if machines:
103 for machine in machines:
104 if not machine or re.search('\s', machine):
105 parser.parser.error("Invalid machine: %s" % str(machine))
106 machines = list(set(machines))
107 machines.sort()
108 return machines
109
110
Prathmesh Prabhu588007d2017-06-15 00:31:31 -0700111def _stage_ssp(parser, resultsdir):
Dan Shicf4d2032015-03-12 15:04:21 -0700112 """Stage server-side package.
113
114 This function calls a control segment to stage server-side package based on
115 the job and autoserv command line option. The detail implementation could
116 be different for each host type. Currently, only CrosHost has
117 stage_server_side_package function defined.
118 The script returns None if no server-side package is available. However,
119 it may raise exception if it failed for reasons other than artifact (the
120 server-side package) not found.
121
122 @param parser: Command line arguments parser passed in the autoserv process.
Prathmesh Prabhu588007d2017-06-15 00:31:31 -0700123 @param resultsdir: Folder to store results. This could be different from
124 parser.options.results: parser.options.results can be set to None
125 for results to be stored in a temp folder. resultsdir can be None
126 for autoserv run requires no logging.
Dan Shicf4d2032015-03-12 15:04:21 -0700127
Dan Shi14de7622016-08-22 11:09:06 -0700128 @return: (ssp_url, error_msg), where
129 ssp_url is a url to the autotest server-side package. None if
130 server-side package is not supported.
131 error_msg is a string indicating the failures. None if server-
132 side package is staged successfully.
Dan Shicf4d2032015-03-12 15:04:21 -0700133 """
Kevin Chengadc99f92016-07-20 08:21:58 -0700134 machines_list = _get_machines(parser)
Prathmesh Prabhu7fc39c52018-03-21 14:08:30 -0700135 machines_list = server_job.get_machine_dicts(
136 machine_names=machines_list,
137 store_dir=os.path.join(resultsdir, parser.options.host_info_subdir),
138 in_lab=parser.options.lab,
139 use_shadow_store=not parser.options.local_only_host_info,
140 host_attributes=parser.options.host_attributes,
141 )
Kevin Chengadc99f92016-07-20 08:21:58 -0700142
Kevin Chengadc99f92016-07-20 08:21:58 -0700143 namespace = {'machines': machines_list,
Jacob Kopczynski9a3a0c32018-07-18 18:36:20 -0700144 'isolate_hash': parser.options.isolate,
Richard Barnette71854c72018-03-30 14:22:09 -0700145 'image': parser.options.test_source_build}
Dan Shicf4d2032015-03-12 15:04:21 -0700146 script_locals = {}
147 execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
Dan Shi14de7622016-08-22 11:09:06 -0700148 return script_locals['ssp_url'], script_locals['error_msg']
Dan Shicf4d2032015-03-12 15:04:21 -0700149
150
Ben Kwabedacad2017-08-28 12:20:38 -0700151def _run_with_ssp(job, container_id, job_id, results, parser, ssp_url,
Dan Shi3be35af2016-08-25 23:22:40 -0700152 job_folder, machines):
Dan Shicf4d2032015-03-12 15:04:21 -0700153 """Run the server job with server-side packaging.
154
Dan Shi37befda2015-12-07 13:16:56 -0800155 @param job: The server job object.
Ben Kwabedacad2017-08-28 12:20:38 -0700156 @param container_id: ID of the container to run the test.
Dan Shicf4d2032015-03-12 15:04:21 -0700157 @param job_id: ID of the test job.
158 @param results: Folder to store results. This could be different from
159 parser.options.results:
160 parser.options.results can be set to None for results to be
161 stored in a temp folder.
162 results can be None for autoserv run requires no logging.
163 @param parser: Command line parser that contains the options.
164 @param ssp_url: url of the staged server-side package.
Dan Shiafa63872016-02-23 15:32:31 -0800165 @param job_folder: Name of the job result folder.
Dan Shi3be35af2016-08-25 23:22:40 -0700166 @param machines: A list of machines to run the test.
Dan Shicf4d2032015-03-12 15:04:21 -0700167 """
168 bucket = lxc.ContainerBucket()
169 control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
170 else None)
Dan Shi37befda2015-12-07 13:16:56 -0800171 try:
Dan Shi3be35af2016-08-25 23:22:40 -0700172 dut_name = machines[0] if len(machines) >= 1 else None
Ben Kwabedacad2017-08-28 12:20:38 -0700173 test_container = bucket.setup_test(container_id, job_id, ssp_url,
Dan Shiafa63872016-02-23 15:32:31 -0800174 results, control=control,
Dan Shi3be35af2016-08-25 23:22:40 -0700175 job_folder=job_folder,
176 dut_name=dut_name)
Dan Shi37befda2015-12-07 13:16:56 -0800177 except Exception as e:
178 job.record('FAIL', None, None,
179 'Failed to setup container for test: %s. Check logs in '
180 'ssp_logs folder for more details.' % e)
181 raise
182
Dan Shicf4d2032015-03-12 15:04:21 -0700183 args = sys.argv[:]
184 args.remove('--require-ssp')
Dan Shi77b79a62015-07-29 16:22:05 -0700185 # --parent_job_id is only useful in autoserv running in host, not in
186 # container. Include this argument will cause test to fail for builds before
187 # CL 286265 was merged.
188 if '--parent_job_id' in args:
189 index = args.index('--parent_job_id')
190 args.remove('--parent_job_id')
191 # Remove the actual parent job id in command line arg.
192 del args[index]
Dan Shicf4d2032015-03-12 15:04:21 -0700193
194 # A dictionary of paths to replace in the command line. Key is the path to
195 # be replaced with the one in value.
196 paths_to_replace = {}
197 # Replace the control file path with the one in container.
198 if control:
199 container_control_filename = os.path.join(
200 lxc.CONTROL_TEMP_PATH, os.path.basename(control))
201 paths_to_replace[control] = container_control_filename
202 # Update result directory with the one in container.
Dan Shi65374e22016-09-15 16:14:05 -0700203 container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_folder)
Dan Shicf4d2032015-03-12 15:04:21 -0700204 if parser.options.results:
Dan Shicf4d2032015-03-12 15:04:21 -0700205 paths_to_replace[parser.options.results] = container_result_dir
Dan Shicf4d2032015-03-12 15:04:21 -0700206 args = [paths_to_replace.get(arg, arg) for arg in args]
207
208 # Apply --use-existing-results, results directory is aready created and
209 # mounted in container. Apply this arg to avoid exception being raised.
210 if not '--use-existing-results' in args:
211 args.append('--use-existing-results')
212
213 # Make sure autoserv running in container using a different pid file.
214 if not '--pidfile-label' in args:
215 args.extend(['--pidfile-label', 'container_autoserv'])
216
Dan Shid1f51232015-04-18 00:29:14 -0700217 cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
Dan Shicf4d2032015-03-12 15:04:21 -0700218 logging.info('Run command in container: %s', cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700219 success = False
Dan Shicf4d2032015-03-12 15:04:21 -0700220 try:
221 test_container.attach_run(cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700222 success = True
Dan Shi9d3454e2015-12-08 09:16:08 -0800223 except Exception as e:
224 # If the test run inside container fails without generating any log,
225 # write a message to status.log to help troubleshooting.
226 debug_files = os.listdir(os.path.join(results, 'debug'))
227 if not debug_files:
228 job.record('FAIL', None, None,
229 'Failed to run test inside the container: %s. Check '
230 'logs in ssp_logs folder for more details.' % e)
231 raise
Dan Shicf4d2032015-03-12 15:04:21 -0700232 finally:
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -0800233 metrics.Counter(
234 'chromeos/autotest/experimental/execute_job_in_ssp').increment(
235 fields={'success': success})
Dan Shicf4d2032015-03-12 15:04:21 -0700236 test_container.destroy()
237
238
Dan Shi3f1b8a52015-04-21 11:11:06 -0700239def correct_results_folder_permission(results):
240 """Make sure the results folder has the right permission settings.
241
242 For tests running with server-side packaging, the results folder has the
243 owner of root. This must be changed to the user running the autoserv
244 process, so parsing job can access the results folder.
245 TODO(dshi): crbug.com/459344 Remove this function when test container can be
246 unprivileged container.
247
248 @param results: Path to the results folder.
249
250 """
251 if not results:
252 return
253
Aviv Keshetc03de792017-07-18 14:24:31 -0700254 utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
255 utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
Dan Shi3f1b8a52015-04-21 11:11:06 -0700256
257
Dan Shia06f3e22015-09-03 16:15:15 -0700258def _start_servod(machine):
259 """Try to start servod in moblab if it's not already running or running with
260 different board or port.
261
262 @param machine: Name of the dut used for test.
263 """
264 if not utils.is_moblab():
265 return
266
Dan Shi1cded882015-09-23 16:52:26 -0700267 logging.debug('Trying to start servod.')
Dan Shia06f3e22015-09-03 16:15:15 -0700268 try:
Kevin Cheng9b6930f2016-07-20 14:57:15 -0700269 afe = frontend.AFE()
Dan Shia06f3e22015-09-03 16:15:15 -0700270 board = server_utils.get_board_from_afe(machine, afe)
271 hosts = afe.get_hosts(hostname=machine)
272 servo_host = hosts[0].attributes.get('servo_host', None)
273 servo_port = hosts[0].attributes.get('servo_port', 9999)
274 if not servo_host in ['localhost', '127.0.0.1']:
Dan Shi1cded882015-09-23 16:52:26 -0700275 logging.warn('Starting servod is aborted. The dut\'s servo_host '
276 'attribute is not set to localhost.')
Dan Shia06f3e22015-09-03 16:15:15 -0700277 return
278 except (urllib2.HTTPError, urllib2.URLError):
279 # Ignore error if RPC failed to get board
280 logging.error('Failed to get board name from AFE. Start servod is '
281 'aborted')
282 return
283
284 try:
285 pid = utils.run('pgrep servod').stdout
286 cmd_line = utils.run('ps -fp %s' % pid).stdout
287 if ('--board %s' % board in cmd_line and
288 '--port %s' % servo_port in cmd_line):
289 logging.debug('Servod is already running with given board and port.'
290 ' There is no need to restart servod.')
291 return
292 logging.debug('Servod is running with different board or port. '
293 'Stopping existing servod.')
294 utils.run('sudo stop servod')
295 except error.CmdError:
296 # servod is not running.
297 pass
298
299 try:
300 utils.run(START_SERVOD_CMD % (board, servo_port))
301 logging.debug('Servod is started')
302 except error.CmdError as e:
303 logging.error('Servod failed to be started, error: %s', e)
304
305
Prathmesh Prabhu46047362018-03-16 10:33:19 -0700306def _control_path_on_disk(control_name):
307 """Find the control file corresponding to the given control name, on disk.
308
309 @param control_name: NAME attribute of the control file to fetch.
310 @return: Path to the control file.
311 """
312 cf_getter = suite.create_fs_getter(_AUTOTEST_ROOT)
313 control_name_predicate = suite.test_name_matches_pattern_predicate(
314 '^%s$' % control_name)
315 tests = suite.find_and_parse_tests(cf_getter, control_name_predicate)
316 if not tests:
317 raise error.AutoservError(
318 'Failed to find any control files with NAME %s' % control_name)
319 if len(tests) > 1:
320 logging.error('Found more than one control file with NAME %s: %s',
321 control_name, [t.path for t in tests])
322 raise error.AutoservError(
323 'Found more than one control file with NAME %s' % control_name)
324 return tests[0].path
325
326
327def _stage_control_file(parser, results_dir):
328 """Stage the control file to execute, returning the path to staged file.
329
330 @param parser: Parser for autoserv options.
331 @param results_dir: Results directory to stage the control file into.
332 @return: Absolute path to the staged control file.
333 """
334 # TODO(pprabhu) This function currently always stages the control file from
335 # the local filesystem. This means that both
336 # parser.options.test_source_build and parser.options.image are ignored.
337 # Support will be added once skylab gains support to run SSP tests.
338 control_path = _control_path_on_disk(parser.options.control_name)
339 new_control = os.path.join(results_dir, _CONTROL_FILE_FROM_CONTROL_NAME)
340 shutil.copy2(control_path, new_control)
341 return new_control
342
343
344def _tweak_arguments_for_control_file(parser, control):
345 """Tweak parser arguments to pass in control.
346
347 autoserv running within an SSP container may not support the --test-name
348 argument. We also do not want to duplicate the effort and logic to obtain
349 the right control file outside and inside the SSP container. Instead, we
350 tweak the parser commandline in order to pass in the given control file.
351 """
352 # control_name overrides the control argument, so unset it to force the
353 # autoserv re-execution to use the control file set here.
354 parser.control_name = None
355 if parser.args:
356 parser.args[0] = control
357 else:
358 parser.args.append(control)
359
360
Dan Shic68fefb2015-04-07 10:10:52 -0700361def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700362 """Run server job with given options.
363
364 @param pid_file_manager: PidFileManager used to monitor the autoserv process
365 @param results: Folder to store results.
366 @param parser: Parser for the command line arguments.
367 @param ssp_url: Url to server-side package.
Dan Shic68fefb2015-04-07 10:10:52 -0700368 @param use_ssp: Set to True to run with server-side packaging.
Dan Shicf4d2032015-03-12 15:04:21 -0700369 """
Dan Shiec1d47d2015-02-13 11:38:13 -0800370 if parser.options.warn_no_ssp:
Dan Shic68fefb2015-04-07 10:10:52 -0700371 # Post a warning in the log.
Dan Shiec1d47d2015-02-13 11:38:13 -0800372 logging.warn('Autoserv is required to run with server-side packaging. '
373 'However, no drone is found to support server-side '
374 'packaging. The test will be executed in a drone without '
375 'server-side packaging supported.')
376
jadmanski0afbb632008-06-06 21:10:57 +0000377 # send stdin to /dev/null
378 dev_null = os.open(os.devnull, os.O_RDONLY)
379 os.dup2(dev_null, sys.stdin.fileno())
380 os.close(dev_null)
mblighdbf37612007-11-24 19:38:11 +0000381
Dan Shie8aeb662016-06-30 11:22:03 -0700382 # Create separate process group if the process is not a process group
383 # leader. This allows autoserv process to keep running after the caller
384 # process (drone manager call) exits.
385 if os.getpid() != os.getpgid(0):
386 os.setsid()
mbligh1d42d4e2007-11-05 22:42:00 +0000387
Dan Shicf4d2032015-03-12 15:04:21 -0700388 # Container name is predefined so the container can be destroyed in
389 # handle_sigterm.
390 job_or_task_id = job_directories.get_job_id_or_task_id(
391 parser.options.results)
Ben Kwabedacad2017-08-28 12:20:38 -0700392 container_id = lxc.ContainerId(job_or_task_id, time.time(), os.getpid())
Dan Shiafa63872016-02-23 15:32:31 -0800393 job_folder = job_directories.get_job_folder_name(parser.options.results)
Dan Shicf4d2032015-03-12 15:04:21 -0700394
jadmanski0afbb632008-06-06 21:10:57 +0000395 # Implement SIGTERM handler
mblighc2299562009-07-02 19:00:36 +0000396 def handle_sigterm(signum, frame):
Simran Basi9d9b7292013-10-16 16:44:07 -0700397 logging.debug('Received SIGTERM')
mblighff7d61f2008-12-22 14:53:35 +0000398 if pid_file_manager:
399 pid_file_manager.close_file(1, signal.SIGTERM)
Simran Basi49e21e62013-10-17 12:40:33 -0700400 logging.debug('Finished writing to pid_file. Killing process.')
Dan Shi3f1b8a52015-04-21 11:11:06 -0700401
402 # Update results folder's file permission. This needs to be done ASAP
403 # before the parsing process tries to access the log.
404 if use_ssp and results:
405 correct_results_folder_permission(results)
406
Simran Basid6b83772014-01-06 16:31:30 -0800407 # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
408 # This sleep allows the pending output to be logged before the kill
409 # signal is sent.
410 time.sleep(.1)
Dan Shic68fefb2015-04-07 10:10:52 -0700411 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700412 logging.debug('Destroy container %s before aborting the autoserv '
Ben Kwabedacad2017-08-28 12:20:38 -0700413 'process.', container_id)
Dan Shicf4d2032015-03-12 15:04:21 -0700414 try:
415 bucket = lxc.ContainerBucket()
Ben Kwabedacad2017-08-28 12:20:38 -0700416 container = bucket.get_container(container_id)
Dan Shicf4d2032015-03-12 15:04:21 -0700417 if container:
418 container.destroy()
419 else:
Ben Kwabedacad2017-08-28 12:20:38 -0700420 logging.debug('Container %s is not found.', container_id)
Dan Shicf4d2032015-03-12 15:04:21 -0700421 except:
422 # Handle any exception so the autoserv process can be aborted.
Dan Shi65374e22016-09-15 16:14:05 -0700423 logging.exception('Failed to destroy container %s.',
Ben Kwabedacad2017-08-28 12:20:38 -0700424 container_id)
Dan Shie4a4f9f2015-07-20 09:00:25 -0700425 # Try to correct the result file permission again after the
426 # container is destroyed, as the container might have created some
427 # new files in the result folder.
428 if results:
429 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700430
jadmanski0afbb632008-06-06 21:10:57 +0000431 os.killpg(os.getpgrp(), signal.SIGKILL)
mblighfaf0cd42007-11-19 16:00:24 +0000432
jadmanski0afbb632008-06-06 21:10:57 +0000433 # Set signal handler
mblighc2299562009-07-02 19:00:36 +0000434 signal.signal(signal.SIGTERM, handle_sigterm)
mbligha46678d2008-05-01 20:00:01 +0000435
Simran Basid6b83772014-01-06 16:31:30 -0800436 # faulthandler is only needed to debug in the Lab and is not avaliable to
437 # be imported in the chroot as part of VMTest, so Try-Except it.
438 try:
439 import faulthandler
440 faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
441 logging.debug('faulthandler registered on SIGTERM.')
442 except ImportError:
Christopher Grant4beca022015-06-16 15:14:47 -0400443 sys.exc_clear()
Simran Basid6b83772014-01-06 16:31:30 -0800444
David Rochberg8a60d1e2011-02-01 14:22:07 -0500445 # Ignore SIGTTOU's generated by output from forked children.
446 signal.signal(signal.SIGTTOU, signal.SIG_IGN)
447
Alex Millerf1af17e2013-01-09 22:50:32 -0800448 # If we received a SIGALARM, let's be loud about it.
449 signal.signal(signal.SIGALRM, log_alarm)
450
mbligha5f5e542009-12-30 16:57:49 +0000451 # Server side tests that call shell scripts often depend on $USER being set
452 # but depending on how you launch your autotest scheduler it may not be set.
453 os.environ['USER'] = getpass.getuser()
454
mblighb2bea302008-07-24 20:25:57 +0000455 label = parser.options.label
mbligh374f3412009-05-13 21:29:45 +0000456 group_name = parser.options.group_name
mblighb2bea302008-07-24 20:25:57 +0000457 user = parser.options.user
458 client = parser.options.client
459 server = parser.options.server
mblighb2bea302008-07-24 20:25:57 +0000460 verify = parser.options.verify
461 repair = parser.options.repair
showard45ae8192008-11-05 19:32:53 +0000462 cleanup = parser.options.cleanup
Alex Millercb79ba72013-05-29 14:43:00 -0700463 provision = parser.options.provision
Dan Shi07e09af2013-04-12 09:31:29 -0700464 reset = parser.options.reset
Alex Miller667b5f22014-02-28 15:33:39 -0800465 job_labels = parser.options.job_labels
mblighb2bea302008-07-24 20:25:57 +0000466 no_tee = parser.options.no_tee
mblighe7d9c602009-07-02 19:02:33 +0000467 execution_tag = parser.options.execution_tag
jadmanski0afbb632008-06-06 21:10:57 +0000468 ssh_user = parser.options.ssh_user
469 ssh_port = parser.options.ssh_port
470 ssh_pass = parser.options.ssh_pass
jadmanskidef0c3c2009-03-25 20:07:10 +0000471 collect_crashinfo = parser.options.collect_crashinfo
mblighe0cbc912010-03-11 18:03:07 +0000472 control_filename = parser.options.control_filename
Scott Zawalski91493c82013-01-25 16:15:20 -0500473 test_retry = parser.options.test_retry
beepscb6f1e22013-06-28 19:14:10 -0700474 verify_job_repo_url = parser.options.verify_job_repo_url
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700475 skip_crash_collection = parser.options.skip_crash_collection
Aviv Keshet18ee3142013-08-12 15:01:51 -0700476 ssh_verbosity = int(parser.options.ssh_verbosity)
Fang Deng6cc20de2013-09-06 15:47:32 -0700477 ssh_options = parser.options.ssh_options
Dan Shib669cbd2013-09-13 11:17:17 -0700478 no_use_packaging = parser.options.no_use_packaging
Simran Basi1bf60eb2015-12-01 16:39:29 -0800479 in_lab = bool(parser.options.lab)
mbligha46678d2008-05-01 20:00:01 +0000480
mblighb2bea302008-07-24 20:25:57 +0000481 # can't be both a client and a server side test
482 if client and server:
Eric Li861b2d52011-02-04 14:50:35 -0800483 parser.parser.error("Can not specify a test as both server and client!")
mblighb2bea302008-07-24 20:25:57 +0000484
Alex Millercb79ba72013-05-29 14:43:00 -0700485 if provision and client:
486 parser.parser.error("Cannot specify provisioning and client!")
487
488 is_special_task = (verify or repair or cleanup or collect_crashinfo or
Dan Shi07e09af2013-04-12 09:31:29 -0700489 provision or reset)
Prathmesh Prabhu46047362018-03-16 10:33:19 -0700490 if parser.options.control_name:
491 control = _stage_control_file(parser, results)
492 _tweak_arguments_for_control_file(parser, control)
493 elif parser.args:
494 control = parser.args[0]
495 else:
496 # Special tasks do not have any control file at all.
497 control = None
498
499 if not any([is_special_task, control]):
Eric Li861b2d52011-02-04 14:50:35 -0800500 parser.parser.error("Missing argument: control file")
mbligha46678d2008-05-01 20:00:01 +0000501
Aviv Keshet18ee3142013-08-12 15:01:51 -0700502 if ssh_verbosity > 0:
503 # ssh_verbosity is an integer between 0 and 3, inclusive
504 ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
Fang Dengd1c2b732013-08-20 12:59:46 -0700505 else:
506 ssh_verbosity_flag = ''
Aviv Keshet18ee3142013-08-12 15:01:51 -0700507
Dan Shicf4d2032015-03-12 15:04:21 -0700508 machines = _get_machines(parser)
mbligh374f3412009-05-13 21:29:45 +0000509 if group_name and len(machines) < 2:
Dan Shicf4d2032015-03-12 15:04:21 -0700510 parser.parser.error('-G %r may only be supplied with more than one '
511 'machine.' % group_name)
mbligh374f3412009-05-13 21:29:45 +0000512
Prathmesh Prabhu7fc39c52018-03-21 14:08:30 -0700513 job_kwargs = {
514 'control': control,
515 'args': parser.args[1:],
516 'resultdir': results,
517 'label': label,
518 'user': user,
519 'machines': machines,
520 'machine_dict_list': server_job.get_machine_dicts(
521 machine_names=machines,
522 store_dir=os.path.join(results,
523 parser.options.host_info_subdir),
524 in_lab=in_lab,
525 use_shadow_store=not parser.options.local_only_host_info,
526 host_attributes=parser.options.host_attributes,
527 ),
528 'client': client,
Prathmesh Prabhu7fc39c52018-03-21 14:08:30 -0700529 'ssh_user': ssh_user,
530 'ssh_port': ssh_port,
531 'ssh_pass': ssh_pass,
532 'ssh_verbosity_flag': ssh_verbosity_flag,
533 'ssh_options': ssh_options,
534 'test_retry': test_retry,
535 'group_name': group_name,
536 'tag': execution_tag,
537 'disable_sysinfo': parser.options.disable_sysinfo,
538 'in_lab': in_lab,
539 }
Dan Shi70647ca2015-07-16 22:52:35 -0700540 if parser.options.parent_job_id:
Prathmesh Prabhu7fc39c52018-03-21 14:08:30 -0700541 job_kwargs['parent_job_id'] = int(parser.options.parent_job_id)
mblighe0cbc912010-03-11 18:03:07 +0000542 if control_filename:
Prathmesh Prabhu7fc39c52018-03-21 14:08:30 -0700543 job_kwargs['control_filename'] = control_filename
544 job = server_job.server_job(**job_kwargs)
Dan Shicf4d2032015-03-12 15:04:21 -0700545
showard75cdfee2009-06-10 17:40:41 +0000546 job.logging.start_logging()
mbligha46678d2008-05-01 20:00:01 +0000547
mbligh161fe6f2008-06-19 16:26:04 +0000548 # perform checks
549 job.precheck()
550
jadmanski0afbb632008-06-06 21:10:57 +0000551 # run the job
552 exit_code = 0
Dan Shic1b8bdd2015-09-14 23:11:24 -0700553 auto_start_servod = _CONFIG.get_config_value(
554 'AUTOSERV', 'auto_start_servod', type=bool, default=False)
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700555
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800556 site_utils.SetupTsMonGlobalState('autoserv', indirect=False,
Aviv Keshetaece2042018-07-17 16:27:22 -0700557 short_lived=True)
jadmanski0afbb632008-06-06 21:10:57 +0000558 try:
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800559 try:
560 if repair:
561 if auto_start_servod and len(machines) == 1:
562 _start_servod(machines[0])
563 job.repair(job_labels)
564 elif verify:
565 job.verify(job_labels)
566 elif provision:
567 job.provision(job_labels)
568 elif reset:
569 job.reset(job_labels)
570 elif cleanup:
571 job.cleanup(job_labels)
572 else:
573 if auto_start_servod and len(machines) == 1:
574 _start_servod(machines[0])
575 if use_ssp:
576 try:
Ben Kwabedacad2017-08-28 12:20:38 -0700577 _run_with_ssp(job, container_id, job_or_task_id,
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800578 results, parser, ssp_url, job_folder,
579 machines)
580 finally:
581 # Update the ownership of files in result folder.
582 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700583 else:
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800584 if collect_crashinfo:
585 # Update the ownership of files in result folder. If the
586 # job to collect crashinfo was running inside container
587 # (SSP) and crashed before correcting folder permission,
588 # the result folder might have wrong permission setting.
Dan Shiafa63872016-02-23 15:32:31 -0800589 try:
590 correct_results_folder_permission(results)
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800591 except:
592 # Ignore any error as the user may not have root
593 # permission to run sudo command.
594 pass
Aviv Keshet92bf7b62017-02-13 15:34:03 -0800595 metric_name = ('chromeos/autotest/experimental/'
596 'autoserv_job_run_duration')
597 f = {'in_container': utils.is_in_container(),
598 'success': False}
599 with metrics.SecondsTimer(metric_name, fields=f) as c:
Richard Barnette71854c72018-03-30 14:22:09 -0700600 job.run(verify_job_repo_url=verify_job_repo_url,
Aviv Keshet92bf7b62017-02-13 15:34:03 -0800601 only_collect_crashinfo=collect_crashinfo,
602 skip_crash_collection=skip_crash_collection,
603 job_labels=job_labels,
604 use_packaging=(not no_use_packaging))
605 c['success'] = True
606
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800607 finally:
Hidehiko Abe06893302017-06-24 07:32:38 +0900608 job.close()
Dan Shiffd5b822017-07-14 11:16:23 -0700609 # Special task doesn't run parse, so result summary needs to be
610 # built here.
611 if results and (repair or verify or reset or cleanup or provision):
Dan Shi4f8c0242017-07-07 15:34:49 -0700612 # Throttle the result on the server side.
613 try:
614 result_utils.execute(
615 results, control_data.DEFAULT_MAX_RESULT_SIZE_KB)
616 except:
617 logging.exception(
618 'Non-critical failure: Failed to throttle results '
619 'in directory %s.', results)
620 # Build result view and report metrics for result sizes.
Dan Shiffd5b822017-07-14 11:16:23 -0700621 site_utils.collect_result_sizes(results)
jadmanski0afbb632008-06-06 21:10:57 +0000622 except:
jadmanski27b37ea2008-10-29 23:54:31 +0000623 exit_code = 1
jadmanski0afbb632008-06-06 21:10:57 +0000624 traceback.print_exc()
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800625 finally:
626 metrics.Flush()
mbligha46678d2008-05-01 20:00:01 +0000627
jadmanski27b37ea2008-10-29 23:54:31 +0000628 sys.exit(exit_code)
mbligha46678d2008-05-01 20:00:01 +0000629
630
Prathmesh Prabhu9a631082018-05-11 17:30:09 -0700631# Job breakdown statuses
632_hs = host_states.Status
633_qs = host_queue_entry_states.Status
634_status_list = [
635 _qs.QUEUED, _qs.RESETTING, _qs.VERIFYING,
636 _qs.PROVISIONING, _hs.REPAIRING, _qs.CLEANING,
637 _qs.RUNNING, _qs.GATHERING, _qs.PARSING]
638_JOB_OVERHEAD_STATUS = enum.Enum(*_status_list, string_values=True)
Paul Hobbs68d98592017-08-22 02:22:49 -0700639
640
641def get_job_status(options):
642 """Returns the HQE Status for this run.
643
644 @param options: parser options.
645 """
Prathmesh Prabhu9a631082018-05-11 17:30:09 -0700646 s = _JOB_OVERHEAD_STATUS
Fang Deng042c1472014-10-23 13:56:41 -0700647 task_mapping = {
648 'reset': s.RESETTING, 'verify': s.VERIFYING,
649 'provision': s.PROVISIONING, 'repair': s.REPAIRING,
650 'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
Paul Hobbs68d98592017-08-22 02:22:49 -0700651 match = [task for task in task_mapping if getattr(options, task, False)]
652 return task_mapping[match[0]] if match else s.RUNNING
Fang Deng042c1472014-10-23 13:56:41 -0700653
654
mbligha46678d2008-05-01 20:00:01 +0000655def main():
Fang Deng042c1472014-10-23 13:56:41 -0700656 start_time = datetime.datetime.now()
jadmanski0afbb632008-06-06 21:10:57 +0000657 # grab the parser
658 parser = autoserv_parser.autoserv_parser
mbligha5cb4062009-02-17 15:53:39 +0000659 parser.parse_args()
mbligha46678d2008-05-01 20:00:01 +0000660
jadmanski0afbb632008-06-06 21:10:57 +0000661 if len(sys.argv) == 1:
662 parser.parser.print_help()
663 sys.exit(1)
mbligha6f13082008-06-05 23:53:46 +0000664
showard75cdfee2009-06-10 17:40:41 +0000665 if parser.options.no_logging:
666 results = None
667 else:
668 results = parser.options.results
mbligh80e1eba2008-11-19 00:26:18 +0000669 if not results:
670 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
Dan Shi14de7622016-08-22 11:09:06 -0700671 results = os.path.abspath(results)
showard566d3c02010-01-12 18:57:01 +0000672 resultdir_exists = False
673 for filename in ('control.srv', 'status.log', '.autoserv_execute'):
674 if os.path.exists(os.path.join(results, filename)):
675 resultdir_exists = True
mbligh4608b002010-01-05 18:22:35 +0000676 if not parser.options.use_existing_results and resultdir_exists:
mbligh80e1eba2008-11-19 00:26:18 +0000677 error = "Error: results directory already exists: %s\n" % results
678 sys.stderr.write(error)
679 sys.exit(1)
mbligha788dc42009-03-26 21:10:16 +0000680
681 # Now that we certified that there's no leftover results dir from
682 # previous jobs, lets create the result dir since the logging system
683 # needs to create the log file in there.
684 if not os.path.isdir(results):
685 os.makedirs(results)
showard75cdfee2009-06-10 17:40:41 +0000686
Prathmesh Prabhu7ae68ae2017-06-15 00:29:31 -0700687 # If the job requires to run with server-side package, try to stage server-
688 # side package first. If that fails with error that autotest server package
689 # does not exist, fall back to run the job without using server-side
690 # packaging. If option warn_no_ssp is specified, that means autoserv is
691 # running in a drone does not support SSP, thus no need to stage server-side
692 # package.
693 ssp_url = None
694 ssp_url_warning = False
695 if (not parser.options.warn_no_ssp and parser.options.require_ssp):
Prathmesh Prabhu588007d2017-06-15 00:31:31 -0700696 ssp_url, ssp_error_msg = _stage_ssp(parser, results)
Prathmesh Prabhu7ae68ae2017-06-15 00:29:31 -0700697 # The build does not have autotest server package. Fall back to not
698 # to use server-side package. Logging is postponed until logging being
699 # set up.
700 ssp_url_warning = not ssp_url
701
Dan Shic68fefb2015-04-07 10:10:52 -0700702 # Server-side packaging will only be used if it's required and the package
703 # is available. If warn_no_ssp is specified, it means that autoserv is
704 # running in a drone does not have SSP supported and a warning will be logs.
705 # Therefore, it should not run with SSP.
706 use_ssp = (not parser.options.warn_no_ssp and parser.options.require_ssp
707 and ssp_url)
708 if use_ssp:
Dan Shie28de552015-05-06 16:51:58 -0700709 log_dir = os.path.join(results, 'ssp_logs') if results else None
Dan Shicf4d2032015-03-12 15:04:21 -0700710 if log_dir and not os.path.exists(log_dir):
711 os.makedirs(log_dir)
712 else:
713 log_dir = results
Dan Shi3f1b8a52015-04-21 11:11:06 -0700714
showard75cdfee2009-06-10 17:40:41 +0000715 logging_manager.configure_logging(
Dan Shicf4d2032015-03-12 15:04:21 -0700716 server_logging_config.ServerLoggingConfig(),
717 results_dir=log_dir,
showard10d84172009-06-18 23:16:50 +0000718 use_console=not parser.options.no_tee,
719 verbose=parser.options.verbose,
720 no_console_prefix=parser.options.no_console_prefix)
Dan Shicf4d2032015-03-12 15:04:21 -0700721
Dan Shi0b754c52015-04-20 14:20:38 -0700722 if ssp_url_warning:
723 logging.warn(
724 'Autoserv is required to run with server-side packaging. '
Dan Shi6bfbdb62017-09-25 13:33:53 -0700725 'However, no server-side package can be staged based on '
Richard Barnette71854c72018-03-30 14:22:09 -0700726 '`--test_source_build`, host attribute job_repo_url or host '
727 'OS version label. It could be that the build to test is '
728 'older than the minimum version that supports server-side '
729 'packaging, or no devserver can be found to stage server-side '
730 'package. The test will be executed without using server-side '
731 'packaging. Following is the detailed error:\n%s',
732 ssp_error_msg)
Dan Shi0b754c52015-04-20 14:20:38 -0700733
showard75cdfee2009-06-10 17:40:41 +0000734 if results:
mbligha788dc42009-03-26 21:10:16 +0000735 logging.info("Results placed in %s" % results)
mbligh10717632008-11-19 00:21:57 +0000736
mbligh4608b002010-01-05 18:22:35 +0000737 # wait until now to perform this check, so it get properly logged
Dan Shicf4d2032015-03-12 15:04:21 -0700738 if (parser.options.use_existing_results and not resultdir_exists and
Dan Shiff78f112015-06-12 13:34:02 -0700739 not utils.is_in_container()):
mbligh4608b002010-01-05 18:22:35 +0000740 logging.error("No existing results directory found: %s", results)
741 sys.exit(1)
742
Dan Shicf4d2032015-03-12 15:04:21 -0700743 logging.debug('autoserv is running in drone %s.', socket.gethostname())
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700744 logging.debug('autoserv command was: %s', ' '.join(sys.argv))
Prathmesh Prabhu7fc39c52018-03-21 14:08:30 -0700745 logging.debug('autoserv parsed options: %s', parser.options)
mbligh4608b002010-01-05 18:22:35 +0000746
Dan Shicf4d2032015-03-12 15:04:21 -0700747 if parser.options.write_pidfile and results:
mbligh4608b002010-01-05 18:22:35 +0000748 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
749 results)
jadmanskid5ab8c52008-12-03 16:27:07 +0000750 pid_file_manager.open_file()
mblighff7d61f2008-12-22 14:53:35 +0000751 else:
752 pid_file_manager = None
mbligha46678d2008-05-01 20:00:01 +0000753
Allen Lid5abdab2017-02-07 16:03:43 -0800754 autotest.Autotest.set_install_in_tmpdir(
jadmanskif22fea82008-11-26 20:57:07 +0000755 parser.options.install_in_tmpdir)
756
jadmanski0afbb632008-06-06 21:10:57 +0000757 exit_code = 0
Allen Lif146e872017-08-15 18:24:31 -0700758 # TODO(beeps): Extend this to cover different failure modes.
759 # Testing exceptions are matched against labels sent to autoserv. Eg,
760 # to allow only the hostless job to run, specify
761 # testing_exceptions: test_suite in the shadow_config. To allow both
762 # the hostless job and dummy_Pass to run, specify
763 # testing_exceptions: test_suite,dummy_Pass. You can figure out
764 # what label autoserv is invoked with by looking through the logs of a test
765 # for the autoserv command's -l option.
766 testing_exceptions = _CONFIG.get_config_value(
767 'AUTOSERV', 'testing_exceptions', type=list, default=[])
768 test_mode = _CONFIG.get_config_value(
769 'AUTOSERV', 'testing_mode', type=bool, default=False)
770 test_mode = (results_mocker and test_mode and not
771 any([ex in parser.options.label
772 for ex in testing_exceptions]))
773 is_task = (parser.options.verify or parser.options.repair or
774 parser.options.provision or parser.options.reset or
775 parser.options.cleanup or parser.options.collect_crashinfo)
Paul Hobbse9fd5572017-08-22 02:48:25 -0700776
777 trace_labels = {
778 'job_id': job_directories.get_job_id_or_task_id(
779 parser.options.results)
780 }
781 trace = cloud_trace.SpanStack(
782 labels=trace_labels,
783 global_context=parser.options.cloud_trace_context)
784 trace.enabled = parser.options.cloud_trace_context_enabled == 'True'
jadmanski0afbb632008-06-06 21:10:57 +0000785 try:
786 try:
Allen Lif146e872017-08-15 18:24:31 -0700787 if test_mode:
788 # The parser doesn't run on tasks anyway, so we can just return
789 # happy signals without faking results.
790 if not is_task:
791 machine = parser.options.results.split('/')[-1]
792
793 # TODO(beeps): The proper way to do this would be to
794 # refactor job creation so we can invoke job.record
795 # directly. To do that one needs to pipe the test_name
796 # through run_autoserv and bail just before invoking
797 # the server job. See the comment in
798 # puppylab/results_mocker for more context.
799 results_mocker.ResultsMocker(
800 'unknown-test', parser.options.results, machine
801 ).mock_results()
802 return
803 else:
Paul Hobbse9fd5572017-08-22 02:48:25 -0700804 with trace.Span(get_job_status(parser.options)):
805 run_autoserv(pid_file_manager, results, parser, ssp_url,
806 use_ssp)
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700807 except SystemExit as e:
jadmanski0afbb632008-06-06 21:10:57 +0000808 exit_code = e.code
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700809 if exit_code:
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700810 logging.exception('Uncaught SystemExit with code %s', exit_code)
811 except Exception:
jadmanski0afbb632008-06-06 21:10:57 +0000812 # If we don't know what happened, we'll classify it as
813 # an 'abort' and return 1.
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700814 logging.exception('Uncaught Exception, exit_code = 1.')
jadmanski0afbb632008-06-06 21:10:57 +0000815 exit_code = 1
816 finally:
mblighff7d61f2008-12-22 14:53:35 +0000817 if pid_file_manager:
818 pid_file_manager.close_file(exit_code)
jadmanski0afbb632008-06-06 21:10:57 +0000819 sys.exit(exit_code)
mblighfaf0cd42007-11-19 16:00:24 +0000820
mblighbb421852008-03-11 22:36:16 +0000821
mbligha46678d2008-05-01 20:00:01 +0000822if __name__ == '__main__':
jadmanski0afbb632008-06-06 21:10:57 +0000823 main()