blob: f788ab75d1c1dfb6fb6b7b36dffb911e1052a9f1 [file] [log] [blame]
mbligh6203ace2007-10-04 21:54:24 +00001#!/usr/bin/python -u
mbligh1ffd5dc2008-11-25 13:24:05 +00002# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
mbligh82648e52008-11-20 16:54:25 +00003# Released under the GPL v2
mblighdcd57a82007-07-11 23:06:47 +00004
mblighc8949b82007-07-23 16:33:58 +00005"""
Aviv Keshetde6bb192013-01-30 16:17:22 -08006Run a control file through the server side engine
mblighdcd57a82007-07-11 23:06:47 +00007"""
mbligh1ffd5dc2008-11-25 13:24:05 +00008
Fang Deng042c1472014-10-23 13:56:41 -07009import datetime
Paul Hobbs20cc72a2016-08-30 16:57:05 -070010import contextlib
Fang Deng042c1472014-10-23 13:56:41 -070011import getpass
12import logging
13import os
14import re
Prathmesh Prabhu46047362018-03-16 10:33:19 -070015import shutil
Fang Deng042c1472014-10-23 13:56:41 -070016import signal
Dan Shicf4d2032015-03-12 15:04:21 -070017import socket
Fang Deng042c1472014-10-23 13:56:41 -070018import sys
19import traceback
20import time
21import urllib2
mbligh1ffd5dc2008-11-25 13:24:05 +000022
mblighf5427bb2008-04-09 15:55:57 +000023import common
Dan Shi4f8c0242017-07-07 15:34:49 -070024from autotest_lib.client.bin.result_tools import utils as result_utils
25from autotest_lib.client.bin.result_tools import view as result_view
Dan Shia1ecd5c2013-06-06 11:21:31 -070026from autotest_lib.client.common_lib import control_data
Prathmesh Prabhu9a631082018-05-11 17:30:09 -070027from autotest_lib.client.common_lib import enum
Dan Shi32649b82015-08-29 20:53:36 -070028from autotest_lib.client.common_lib import error
Dan Shia1ecd5c2013-06-06 11:21:31 -070029from autotest_lib.client.common_lib import global_config
Prathmesh Prabhu9a631082018-05-11 17:30:09 -070030from autotest_lib.client.common_lib import host_queue_entry_states
31from autotest_lib.client.common_lib import host_states
Allen Lif146e872017-08-15 18:24:31 -070032from autotest_lib.server import results_mocker
Prathmesh Prabhu46047362018-03-16 10:33:19 -070033from autotest_lib.server.cros.dynamic_suite import suite
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080034
Dan Shi5e2efb72017-02-07 11:40:23 -080035try:
36 from chromite.lib import metrics
Paul Hobbse9fd5572017-08-22 02:48:25 -070037 from chromite.lib import cloud_trace
Dan Shi5e2efb72017-02-07 11:40:23 -080038except ImportError:
Prathmesh Prabhud16c8012017-08-28 11:42:46 -070039 from autotest_lib.client.common_lib import utils as common_utils
40 metrics = common_utils.metrics_mock
Paul Hobbse9fd5572017-08-22 02:48:25 -070041 import mock
42 cloud_trace = mock.MagicMock()
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -080043
Dan Shia06f3e22015-09-03 16:15:15 -070044_CONFIG = global_config.global_config
45
Dan Shia1ecd5c2013-06-06 11:21:31 -070046
Jakob Jueliche497b552014-09-23 19:11:59 -070047# Number of seconds to wait before returning if testing mode is enabled
Prashanth B6285f6a2014-05-08 18:01:27 -070048TESTING_MODE_SLEEP_SECS = 1
Jakob Jueliche497b552014-09-23 19:11:59 -070049
mbligh9ff89cd2009-09-03 20:28:17 +000050
Kevin Cheng9b6930f2016-07-20 14:57:15 -070051from autotest_lib.server import frontend
showard75cdfee2009-06-10 17:40:41 +000052from autotest_lib.server import server_logging_config
showard043c62a2009-06-10 19:48:57 +000053from autotest_lib.server import server_job, utils, autoserv_parser, autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070054from autotest_lib.server import utils as server_utils
Paul Hobbs20cc72a2016-08-30 16:57:05 -070055from autotest_lib.server import site_utils
Kevin Chengadc99f92016-07-20 08:21:58 -070056from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Dan Shicf4d2032015-03-12 15:04:21 -070057from autotest_lib.site_utils import job_directories
Dan Shicf4d2032015-03-12 15:04:21 -070058from autotest_lib.site_utils import lxc
Ben Kwa966db082017-06-05 14:17:23 -070059from autotest_lib.site_utils.lxc import utils as lxc_utils
showard75cdfee2009-06-10 17:40:41 +000060from autotest_lib.client.common_lib import pidfile, logging_manager
mbligh92c0fc22008-11-20 16:52:23 +000061
Paul Hobbs20cc72a2016-08-30 16:57:05 -070062
Dan Shicf4d2032015-03-12 15:04:21 -070063# Control segment to stage server-side package.
64STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
65 'stage_server_side_package')
66
Dan Shia06f3e22015-09-03 16:15:15 -070067# Command line to start servod in a moblab.
68START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s'
69STOP_SERVOD_CMD = 'sudo stop servod'
70
Prathmesh Prabhu46047362018-03-16 10:33:19 -070071_AUTOTEST_ROOT = os.path.realpath(os.path.join(os.path.dirname(__file__), '..'))
72_CONTROL_FILE_FROM_CONTROL_NAME = 'control.from_control_name'
73
Alex Millerf1af17e2013-01-09 22:50:32 -080074def log_alarm(signum, frame):
75 logging.error("Received SIGALARM. Ignoring and continuing on.")
Alex Miller0528d6f2013-01-11 10:49:48 -080076 sys.exit(1)
Alex Millerf1af17e2013-01-09 22:50:32 -080077
Dan Shicf4d2032015-03-12 15:04:21 -070078
79def _get_machines(parser):
80 """Get a list of machine names from command line arg -m or a file.
81
82 @param parser: Parser for the command line arguments.
83
84 @return: A list of machine names from command line arg -m or the
85 machines file specified in the command line arg -M.
86 """
87 if parser.options.machines:
88 machines = parser.options.machines.replace(',', ' ').strip().split()
89 else:
90 machines = []
91 machines_file = parser.options.machines_file
92 if machines_file:
93 machines = []
94 for m in open(machines_file, 'r').readlines():
95 # remove comments, spaces
96 m = re.sub('#.*', '', m).strip()
97 if m:
98 machines.append(m)
99 logging.debug('Read list of machines from file: %s', machines_file)
100 logging.debug('Machines: %s', ','.join(machines))
101
102 if machines:
103 for machine in machines:
104 if not machine or re.search('\s', machine):
105 parser.parser.error("Invalid machine: %s" % str(machine))
106 machines = list(set(machines))
107 machines.sort()
108 return machines
109
110
Prathmesh Prabhu588007d2017-06-15 00:31:31 -0700111def _stage_ssp(parser, resultsdir):
Dan Shicf4d2032015-03-12 15:04:21 -0700112 """Stage server-side package.
113
114 This function calls a control segment to stage server-side package based on
115 the job and autoserv command line option. The detail implementation could
116 be different for each host type. Currently, only CrosHost has
117 stage_server_side_package function defined.
118 The script returns None if no server-side package is available. However,
119 it may raise exception if it failed for reasons other than artifact (the
120 server-side package) not found.
121
122 @param parser: Command line arguments parser passed in the autoserv process.
Prathmesh Prabhu588007d2017-06-15 00:31:31 -0700123 @param resultsdir: Folder to store results. This could be different from
124 parser.options.results: parser.options.results can be set to None
125 for results to be stored in a temp folder. resultsdir can be None
126 for autoserv run requires no logging.
Dan Shicf4d2032015-03-12 15:04:21 -0700127
Dan Shi14de7622016-08-22 11:09:06 -0700128 @return: (ssp_url, error_msg), where
129 ssp_url is a url to the autotest server-side package. None if
130 server-side package is not supported.
131 error_msg is a string indicating the failures. None if server-
132 side package is staged successfully.
Dan Shicf4d2032015-03-12 15:04:21 -0700133 """
Kevin Chengadc99f92016-07-20 08:21:58 -0700134 machines_list = _get_machines(parser)
Prathmesh Prabhu7fc39c52018-03-21 14:08:30 -0700135 machines_list = server_job.get_machine_dicts(
136 machine_names=machines_list,
137 store_dir=os.path.join(resultsdir, parser.options.host_info_subdir),
138 in_lab=parser.options.lab,
139 use_shadow_store=not parser.options.local_only_host_info,
140 host_attributes=parser.options.host_attributes,
141 )
Kevin Chengadc99f92016-07-20 08:21:58 -0700142
Kevin Chengadc99f92016-07-20 08:21:58 -0700143 namespace = {'machines': machines_list,
Richard Barnette71854c72018-03-30 14:22:09 -0700144 'image': parser.options.test_source_build}
Dan Shicf4d2032015-03-12 15:04:21 -0700145 script_locals = {}
146 execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
Dan Shi14de7622016-08-22 11:09:06 -0700147 return script_locals['ssp_url'], script_locals['error_msg']
Dan Shicf4d2032015-03-12 15:04:21 -0700148
149
Ben Kwabedacad2017-08-28 12:20:38 -0700150def _run_with_ssp(job, container_id, job_id, results, parser, ssp_url,
Dan Shi3be35af2016-08-25 23:22:40 -0700151 job_folder, machines):
Dan Shicf4d2032015-03-12 15:04:21 -0700152 """Run the server job with server-side packaging.
153
Dan Shi37befda2015-12-07 13:16:56 -0800154 @param job: The server job object.
Ben Kwabedacad2017-08-28 12:20:38 -0700155 @param container_id: ID of the container to run the test.
Dan Shicf4d2032015-03-12 15:04:21 -0700156 @param job_id: ID of the test job.
157 @param results: Folder to store results. This could be different from
158 parser.options.results:
159 parser.options.results can be set to None for results to be
160 stored in a temp folder.
161 results can be None for autoserv run requires no logging.
162 @param parser: Command line parser that contains the options.
163 @param ssp_url: url of the staged server-side package.
Dan Shiafa63872016-02-23 15:32:31 -0800164 @param job_folder: Name of the job result folder.
Dan Shi3be35af2016-08-25 23:22:40 -0700165 @param machines: A list of machines to run the test.
Dan Shicf4d2032015-03-12 15:04:21 -0700166 """
167 bucket = lxc.ContainerBucket()
168 control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
169 else None)
Dan Shi37befda2015-12-07 13:16:56 -0800170 try:
Dan Shi3be35af2016-08-25 23:22:40 -0700171 dut_name = machines[0] if len(machines) >= 1 else None
Ben Kwabedacad2017-08-28 12:20:38 -0700172 test_container = bucket.setup_test(container_id, job_id, ssp_url,
Dan Shiafa63872016-02-23 15:32:31 -0800173 results, control=control,
Dan Shi3be35af2016-08-25 23:22:40 -0700174 job_folder=job_folder,
175 dut_name=dut_name)
Dan Shi37befda2015-12-07 13:16:56 -0800176 except Exception as e:
177 job.record('FAIL', None, None,
178 'Failed to setup container for test: %s. Check logs in '
179 'ssp_logs folder for more details.' % e)
180 raise
181
Dan Shicf4d2032015-03-12 15:04:21 -0700182 args = sys.argv[:]
183 args.remove('--require-ssp')
Dan Shi77b79a62015-07-29 16:22:05 -0700184 # --parent_job_id is only useful in autoserv running in host, not in
185 # container. Include this argument will cause test to fail for builds before
186 # CL 286265 was merged.
187 if '--parent_job_id' in args:
188 index = args.index('--parent_job_id')
189 args.remove('--parent_job_id')
190 # Remove the actual parent job id in command line arg.
191 del args[index]
Dan Shicf4d2032015-03-12 15:04:21 -0700192
193 # A dictionary of paths to replace in the command line. Key is the path to
194 # be replaced with the one in value.
195 paths_to_replace = {}
196 # Replace the control file path with the one in container.
197 if control:
198 container_control_filename = os.path.join(
199 lxc.CONTROL_TEMP_PATH, os.path.basename(control))
200 paths_to_replace[control] = container_control_filename
201 # Update result directory with the one in container.
Dan Shi65374e22016-09-15 16:14:05 -0700202 container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_folder)
Dan Shicf4d2032015-03-12 15:04:21 -0700203 if parser.options.results:
Dan Shicf4d2032015-03-12 15:04:21 -0700204 paths_to_replace[parser.options.results] = container_result_dir
Dan Shicf4d2032015-03-12 15:04:21 -0700205 args = [paths_to_replace.get(arg, arg) for arg in args]
206
207 # Apply --use-existing-results, results directory is aready created and
208 # mounted in container. Apply this arg to avoid exception being raised.
209 if not '--use-existing-results' in args:
210 args.append('--use-existing-results')
211
212 # Make sure autoserv running in container using a different pid file.
213 if not '--pidfile-label' in args:
214 args.extend(['--pidfile-label', 'container_autoserv'])
215
Dan Shid1f51232015-04-18 00:29:14 -0700216 cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
Dan Shicf4d2032015-03-12 15:04:21 -0700217 logging.info('Run command in container: %s', cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700218 success = False
Dan Shicf4d2032015-03-12 15:04:21 -0700219 try:
220 test_container.attach_run(cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700221 success = True
Dan Shi9d3454e2015-12-08 09:16:08 -0800222 except Exception as e:
223 # If the test run inside container fails without generating any log,
224 # write a message to status.log to help troubleshooting.
225 debug_files = os.listdir(os.path.join(results, 'debug'))
226 if not debug_files:
227 job.record('FAIL', None, None,
228 'Failed to run test inside the container: %s. Check '
229 'logs in ssp_logs folder for more details.' % e)
230 raise
Dan Shicf4d2032015-03-12 15:04:21 -0700231 finally:
Prathmesh Prabhua5eecda2016-11-23 16:48:40 -0800232 metrics.Counter(
233 'chromeos/autotest/experimental/execute_job_in_ssp').increment(
234 fields={'success': success})
Dan Shicf4d2032015-03-12 15:04:21 -0700235 test_container.destroy()
236
237
Dan Shi3f1b8a52015-04-21 11:11:06 -0700238def correct_results_folder_permission(results):
239 """Make sure the results folder has the right permission settings.
240
241 For tests running with server-side packaging, the results folder has the
242 owner of root. This must be changed to the user running the autoserv
243 process, so parsing job can access the results folder.
244 TODO(dshi): crbug.com/459344 Remove this function when test container can be
245 unprivileged container.
246
247 @param results: Path to the results folder.
248
249 """
250 if not results:
251 return
252
Aviv Keshetc03de792017-07-18 14:24:31 -0700253 utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
254 utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
Dan Shi3f1b8a52015-04-21 11:11:06 -0700255
256
Dan Shia06f3e22015-09-03 16:15:15 -0700257def _start_servod(machine):
258 """Try to start servod in moblab if it's not already running or running with
259 different board or port.
260
261 @param machine: Name of the dut used for test.
262 """
263 if not utils.is_moblab():
264 return
265
Dan Shi1cded882015-09-23 16:52:26 -0700266 logging.debug('Trying to start servod.')
Dan Shia06f3e22015-09-03 16:15:15 -0700267 try:
Kevin Cheng9b6930f2016-07-20 14:57:15 -0700268 afe = frontend.AFE()
Dan Shia06f3e22015-09-03 16:15:15 -0700269 board = server_utils.get_board_from_afe(machine, afe)
270 hosts = afe.get_hosts(hostname=machine)
271 servo_host = hosts[0].attributes.get('servo_host', None)
272 servo_port = hosts[0].attributes.get('servo_port', 9999)
273 if not servo_host in ['localhost', '127.0.0.1']:
Dan Shi1cded882015-09-23 16:52:26 -0700274 logging.warn('Starting servod is aborted. The dut\'s servo_host '
275 'attribute is not set to localhost.')
Dan Shia06f3e22015-09-03 16:15:15 -0700276 return
277 except (urllib2.HTTPError, urllib2.URLError):
278 # Ignore error if RPC failed to get board
279 logging.error('Failed to get board name from AFE. Start servod is '
280 'aborted')
281 return
282
283 try:
284 pid = utils.run('pgrep servod').stdout
285 cmd_line = utils.run('ps -fp %s' % pid).stdout
286 if ('--board %s' % board in cmd_line and
287 '--port %s' % servo_port in cmd_line):
288 logging.debug('Servod is already running with given board and port.'
289 ' There is no need to restart servod.')
290 return
291 logging.debug('Servod is running with different board or port. '
292 'Stopping existing servod.')
293 utils.run('sudo stop servod')
294 except error.CmdError:
295 # servod is not running.
296 pass
297
298 try:
299 utils.run(START_SERVOD_CMD % (board, servo_port))
300 logging.debug('Servod is started')
301 except error.CmdError as e:
302 logging.error('Servod failed to be started, error: %s', e)
303
304
Prathmesh Prabhu46047362018-03-16 10:33:19 -0700305def _control_path_on_disk(control_name):
306 """Find the control file corresponding to the given control name, on disk.
307
308 @param control_name: NAME attribute of the control file to fetch.
309 @return: Path to the control file.
310 """
311 cf_getter = suite.create_fs_getter(_AUTOTEST_ROOT)
312 control_name_predicate = suite.test_name_matches_pattern_predicate(
313 '^%s$' % control_name)
314 tests = suite.find_and_parse_tests(cf_getter, control_name_predicate)
315 if not tests:
316 raise error.AutoservError(
317 'Failed to find any control files with NAME %s' % control_name)
318 if len(tests) > 1:
319 logging.error('Found more than one control file with NAME %s: %s',
320 control_name, [t.path for t in tests])
321 raise error.AutoservError(
322 'Found more than one control file with NAME %s' % control_name)
323 return tests[0].path
324
325
326def _stage_control_file(parser, results_dir):
327 """Stage the control file to execute, returning the path to staged file.
328
329 @param parser: Parser for autoserv options.
330 @param results_dir: Results directory to stage the control file into.
331 @return: Absolute path to the staged control file.
332 """
333 # TODO(pprabhu) This function currently always stages the control file from
334 # the local filesystem. This means that both
335 # parser.options.test_source_build and parser.options.image are ignored.
336 # Support will be added once skylab gains support to run SSP tests.
337 control_path = _control_path_on_disk(parser.options.control_name)
338 new_control = os.path.join(results_dir, _CONTROL_FILE_FROM_CONTROL_NAME)
339 shutil.copy2(control_path, new_control)
340 return new_control
341
342
343def _tweak_arguments_for_control_file(parser, control):
344 """Tweak parser arguments to pass in control.
345
346 autoserv running within an SSP container may not support the --test-name
347 argument. We also do not want to duplicate the effort and logic to obtain
348 the right control file outside and inside the SSP container. Instead, we
349 tweak the parser commandline in order to pass in the given control file.
350 """
351 # control_name overrides the control argument, so unset it to force the
352 # autoserv re-execution to use the control file set here.
353 parser.control_name = None
354 if parser.args:
355 parser.args[0] = control
356 else:
357 parser.args.append(control)
358
359
Dan Shic68fefb2015-04-07 10:10:52 -0700360def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700361 """Run server job with given options.
362
363 @param pid_file_manager: PidFileManager used to monitor the autoserv process
364 @param results: Folder to store results.
365 @param parser: Parser for the command line arguments.
366 @param ssp_url: Url to server-side package.
Dan Shic68fefb2015-04-07 10:10:52 -0700367 @param use_ssp: Set to True to run with server-side packaging.
Dan Shicf4d2032015-03-12 15:04:21 -0700368 """
Dan Shiec1d47d2015-02-13 11:38:13 -0800369 if parser.options.warn_no_ssp:
Dan Shic68fefb2015-04-07 10:10:52 -0700370 # Post a warning in the log.
Dan Shiec1d47d2015-02-13 11:38:13 -0800371 logging.warn('Autoserv is required to run with server-side packaging. '
372 'However, no drone is found to support server-side '
373 'packaging. The test will be executed in a drone without '
374 'server-side packaging supported.')
375
jadmanski0afbb632008-06-06 21:10:57 +0000376 # send stdin to /dev/null
377 dev_null = os.open(os.devnull, os.O_RDONLY)
378 os.dup2(dev_null, sys.stdin.fileno())
379 os.close(dev_null)
mblighdbf37612007-11-24 19:38:11 +0000380
Dan Shie8aeb662016-06-30 11:22:03 -0700381 # Create separate process group if the process is not a process group
382 # leader. This allows autoserv process to keep running after the caller
383 # process (drone manager call) exits.
384 if os.getpid() != os.getpgid(0):
385 os.setsid()
mbligh1d42d4e2007-11-05 22:42:00 +0000386
Dan Shicf4d2032015-03-12 15:04:21 -0700387 # Container name is predefined so the container can be destroyed in
388 # handle_sigterm.
389 job_or_task_id = job_directories.get_job_id_or_task_id(
390 parser.options.results)
Ben Kwabedacad2017-08-28 12:20:38 -0700391 container_id = lxc.ContainerId(job_or_task_id, time.time(), os.getpid())
Dan Shiafa63872016-02-23 15:32:31 -0800392 job_folder = job_directories.get_job_folder_name(parser.options.results)
Dan Shicf4d2032015-03-12 15:04:21 -0700393
jadmanski0afbb632008-06-06 21:10:57 +0000394 # Implement SIGTERM handler
mblighc2299562009-07-02 19:00:36 +0000395 def handle_sigterm(signum, frame):
Simran Basi9d9b7292013-10-16 16:44:07 -0700396 logging.debug('Received SIGTERM')
mblighff7d61f2008-12-22 14:53:35 +0000397 if pid_file_manager:
398 pid_file_manager.close_file(1, signal.SIGTERM)
Simran Basi49e21e62013-10-17 12:40:33 -0700399 logging.debug('Finished writing to pid_file. Killing process.')
Dan Shi3f1b8a52015-04-21 11:11:06 -0700400
401 # Update results folder's file permission. This needs to be done ASAP
402 # before the parsing process tries to access the log.
403 if use_ssp and results:
404 correct_results_folder_permission(results)
405
Simran Basid6b83772014-01-06 16:31:30 -0800406 # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
407 # This sleep allows the pending output to be logged before the kill
408 # signal is sent.
409 time.sleep(.1)
Dan Shic68fefb2015-04-07 10:10:52 -0700410 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700411 logging.debug('Destroy container %s before aborting the autoserv '
Ben Kwabedacad2017-08-28 12:20:38 -0700412 'process.', container_id)
Dan Shicf4d2032015-03-12 15:04:21 -0700413 try:
414 bucket = lxc.ContainerBucket()
Ben Kwabedacad2017-08-28 12:20:38 -0700415 container = bucket.get_container(container_id)
Dan Shicf4d2032015-03-12 15:04:21 -0700416 if container:
417 container.destroy()
418 else:
Ben Kwabedacad2017-08-28 12:20:38 -0700419 logging.debug('Container %s is not found.', container_id)
Dan Shicf4d2032015-03-12 15:04:21 -0700420 except:
421 # Handle any exception so the autoserv process can be aborted.
Dan Shi65374e22016-09-15 16:14:05 -0700422 logging.exception('Failed to destroy container %s.',
Ben Kwabedacad2017-08-28 12:20:38 -0700423 container_id)
Dan Shie4a4f9f2015-07-20 09:00:25 -0700424 # Try to correct the result file permission again after the
425 # container is destroyed, as the container might have created some
426 # new files in the result folder.
427 if results:
428 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700429
jadmanski0afbb632008-06-06 21:10:57 +0000430 os.killpg(os.getpgrp(), signal.SIGKILL)
mblighfaf0cd42007-11-19 16:00:24 +0000431
jadmanski0afbb632008-06-06 21:10:57 +0000432 # Set signal handler
mblighc2299562009-07-02 19:00:36 +0000433 signal.signal(signal.SIGTERM, handle_sigterm)
mbligha46678d2008-05-01 20:00:01 +0000434
Simran Basid6b83772014-01-06 16:31:30 -0800435 # faulthandler is only needed to debug in the Lab and is not avaliable to
436 # be imported in the chroot as part of VMTest, so Try-Except it.
437 try:
438 import faulthandler
439 faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
440 logging.debug('faulthandler registered on SIGTERM.')
441 except ImportError:
Christopher Grant4beca022015-06-16 15:14:47 -0400442 sys.exc_clear()
Simran Basid6b83772014-01-06 16:31:30 -0800443
David Rochberg8a60d1e2011-02-01 14:22:07 -0500444 # Ignore SIGTTOU's generated by output from forked children.
445 signal.signal(signal.SIGTTOU, signal.SIG_IGN)
446
Alex Millerf1af17e2013-01-09 22:50:32 -0800447 # If we received a SIGALARM, let's be loud about it.
448 signal.signal(signal.SIGALRM, log_alarm)
449
mbligha5f5e542009-12-30 16:57:49 +0000450 # Server side tests that call shell scripts often depend on $USER being set
451 # but depending on how you launch your autotest scheduler it may not be set.
452 os.environ['USER'] = getpass.getuser()
453
mblighb2bea302008-07-24 20:25:57 +0000454 label = parser.options.label
mbligh374f3412009-05-13 21:29:45 +0000455 group_name = parser.options.group_name
mblighb2bea302008-07-24 20:25:57 +0000456 user = parser.options.user
457 client = parser.options.client
458 server = parser.options.server
mblighb2bea302008-07-24 20:25:57 +0000459 verify = parser.options.verify
460 repair = parser.options.repair
showard45ae8192008-11-05 19:32:53 +0000461 cleanup = parser.options.cleanup
Alex Millercb79ba72013-05-29 14:43:00 -0700462 provision = parser.options.provision
Dan Shi07e09af2013-04-12 09:31:29 -0700463 reset = parser.options.reset
Alex Miller667b5f22014-02-28 15:33:39 -0800464 job_labels = parser.options.job_labels
mblighb2bea302008-07-24 20:25:57 +0000465 no_tee = parser.options.no_tee
mblighe7d9c602009-07-02 19:02:33 +0000466 execution_tag = parser.options.execution_tag
jadmanski0afbb632008-06-06 21:10:57 +0000467 ssh_user = parser.options.ssh_user
468 ssh_port = parser.options.ssh_port
469 ssh_pass = parser.options.ssh_pass
jadmanskidef0c3c2009-03-25 20:07:10 +0000470 collect_crashinfo = parser.options.collect_crashinfo
mblighe0cbc912010-03-11 18:03:07 +0000471 control_filename = parser.options.control_filename
Scott Zawalski91493c82013-01-25 16:15:20 -0500472 test_retry = parser.options.test_retry
beepscb6f1e22013-06-28 19:14:10 -0700473 verify_job_repo_url = parser.options.verify_job_repo_url
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700474 skip_crash_collection = parser.options.skip_crash_collection
Aviv Keshet18ee3142013-08-12 15:01:51 -0700475 ssh_verbosity = int(parser.options.ssh_verbosity)
Fang Deng6cc20de2013-09-06 15:47:32 -0700476 ssh_options = parser.options.ssh_options
Dan Shib669cbd2013-09-13 11:17:17 -0700477 no_use_packaging = parser.options.no_use_packaging
Simran Basi1bf60eb2015-12-01 16:39:29 -0800478 in_lab = bool(parser.options.lab)
mbligha46678d2008-05-01 20:00:01 +0000479
mblighb2bea302008-07-24 20:25:57 +0000480 # can't be both a client and a server side test
481 if client and server:
Eric Li861b2d52011-02-04 14:50:35 -0800482 parser.parser.error("Can not specify a test as both server and client!")
mblighb2bea302008-07-24 20:25:57 +0000483
Alex Millercb79ba72013-05-29 14:43:00 -0700484 if provision and client:
485 parser.parser.error("Cannot specify provisioning and client!")
486
487 is_special_task = (verify or repair or cleanup or collect_crashinfo or
Dan Shi07e09af2013-04-12 09:31:29 -0700488 provision or reset)
Prathmesh Prabhu46047362018-03-16 10:33:19 -0700489 if parser.options.control_name:
490 control = _stage_control_file(parser, results)
491 _tweak_arguments_for_control_file(parser, control)
492 elif parser.args:
493 control = parser.args[0]
494 else:
495 # Special tasks do not have any control file at all.
496 control = None
497
498 if not any([is_special_task, control]):
Eric Li861b2d52011-02-04 14:50:35 -0800499 parser.parser.error("Missing argument: control file")
mbligha46678d2008-05-01 20:00:01 +0000500
Aviv Keshet18ee3142013-08-12 15:01:51 -0700501 if ssh_verbosity > 0:
502 # ssh_verbosity is an integer between 0 and 3, inclusive
503 ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
Fang Dengd1c2b732013-08-20 12:59:46 -0700504 else:
505 ssh_verbosity_flag = ''
Aviv Keshet18ee3142013-08-12 15:01:51 -0700506
Dan Shicf4d2032015-03-12 15:04:21 -0700507 machines = _get_machines(parser)
mbligh374f3412009-05-13 21:29:45 +0000508 if group_name and len(machines) < 2:
Dan Shicf4d2032015-03-12 15:04:21 -0700509 parser.parser.error('-G %r may only be supplied with more than one '
510 'machine.' % group_name)
mbligh374f3412009-05-13 21:29:45 +0000511
Prathmesh Prabhu7fc39c52018-03-21 14:08:30 -0700512 job_kwargs = {
513 'control': control,
514 'args': parser.args[1:],
515 'resultdir': results,
516 'label': label,
517 'user': user,
518 'machines': machines,
519 'machine_dict_list': server_job.get_machine_dicts(
520 machine_names=machines,
521 store_dir=os.path.join(results,
522 parser.options.host_info_subdir),
523 in_lab=in_lab,
524 use_shadow_store=not parser.options.local_only_host_info,
525 host_attributes=parser.options.host_attributes,
526 ),
527 'client': client,
Prathmesh Prabhu7fc39c52018-03-21 14:08:30 -0700528 'ssh_user': ssh_user,
529 'ssh_port': ssh_port,
530 'ssh_pass': ssh_pass,
531 'ssh_verbosity_flag': ssh_verbosity_flag,
532 'ssh_options': ssh_options,
533 'test_retry': test_retry,
534 'group_name': group_name,
535 'tag': execution_tag,
536 'disable_sysinfo': parser.options.disable_sysinfo,
537 'in_lab': in_lab,
538 }
Dan Shi70647ca2015-07-16 22:52:35 -0700539 if parser.options.parent_job_id:
Prathmesh Prabhu7fc39c52018-03-21 14:08:30 -0700540 job_kwargs['parent_job_id'] = int(parser.options.parent_job_id)
mblighe0cbc912010-03-11 18:03:07 +0000541 if control_filename:
Prathmesh Prabhu7fc39c52018-03-21 14:08:30 -0700542 job_kwargs['control_filename'] = control_filename
543 job = server_job.server_job(**job_kwargs)
Dan Shicf4d2032015-03-12 15:04:21 -0700544
showard75cdfee2009-06-10 17:40:41 +0000545 job.logging.start_logging()
mbligha46678d2008-05-01 20:00:01 +0000546
mbligh161fe6f2008-06-19 16:26:04 +0000547 # perform checks
548 job.precheck()
549
jadmanski0afbb632008-06-06 21:10:57 +0000550 # run the job
551 exit_code = 0
Dan Shic1b8bdd2015-09-14 23:11:24 -0700552 auto_start_servod = _CONFIG.get_config_value(
553 'AUTOSERV', 'auto_start_servod', type=bool, default=False)
Paul Hobbs20cc72a2016-08-30 16:57:05 -0700554
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800555 site_utils.SetupTsMonGlobalState('autoserv', indirect=False,
Paul Hobbs66251f62017-08-22 02:31:07 -0700556 auto_flush=False, short_lived=True)
jadmanski0afbb632008-06-06 21:10:57 +0000557 try:
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800558 try:
559 if repair:
560 if auto_start_servod and len(machines) == 1:
561 _start_servod(machines[0])
562 job.repair(job_labels)
563 elif verify:
564 job.verify(job_labels)
565 elif provision:
566 job.provision(job_labels)
567 elif reset:
568 job.reset(job_labels)
569 elif cleanup:
570 job.cleanup(job_labels)
571 else:
572 if auto_start_servod and len(machines) == 1:
573 _start_servod(machines[0])
574 if use_ssp:
575 try:
Ben Kwabedacad2017-08-28 12:20:38 -0700576 _run_with_ssp(job, container_id, job_or_task_id,
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800577 results, parser, ssp_url, job_folder,
578 machines)
579 finally:
580 # Update the ownership of files in result folder.
581 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700582 else:
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800583 if collect_crashinfo:
584 # Update the ownership of files in result folder. If the
585 # job to collect crashinfo was running inside container
586 # (SSP) and crashed before correcting folder permission,
587 # the result folder might have wrong permission setting.
Dan Shiafa63872016-02-23 15:32:31 -0800588 try:
589 correct_results_folder_permission(results)
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800590 except:
591 # Ignore any error as the user may not have root
592 # permission to run sudo command.
593 pass
Aviv Keshet92bf7b62017-02-13 15:34:03 -0800594 metric_name = ('chromeos/autotest/experimental/'
595 'autoserv_job_run_duration')
596 f = {'in_container': utils.is_in_container(),
597 'success': False}
598 with metrics.SecondsTimer(metric_name, fields=f) as c:
Richard Barnette71854c72018-03-30 14:22:09 -0700599 job.run(verify_job_repo_url=verify_job_repo_url,
Aviv Keshet92bf7b62017-02-13 15:34:03 -0800600 only_collect_crashinfo=collect_crashinfo,
601 skip_crash_collection=skip_crash_collection,
602 job_labels=job_labels,
603 use_packaging=(not no_use_packaging))
604 c['success'] = True
605
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800606 finally:
Hidehiko Abe06893302017-06-24 07:32:38 +0900607 job.close()
Dan Shiffd5b822017-07-14 11:16:23 -0700608 # Special task doesn't run parse, so result summary needs to be
609 # built here.
610 if results and (repair or verify or reset or cleanup or provision):
Dan Shi4f8c0242017-07-07 15:34:49 -0700611 # Throttle the result on the server side.
612 try:
613 result_utils.execute(
614 results, control_data.DEFAULT_MAX_RESULT_SIZE_KB)
615 except:
616 logging.exception(
617 'Non-critical failure: Failed to throttle results '
618 'in directory %s.', results)
619 # Build result view and report metrics for result sizes.
Dan Shiffd5b822017-07-14 11:16:23 -0700620 site_utils.collect_result_sizes(results)
jadmanski0afbb632008-06-06 21:10:57 +0000621 except:
jadmanski27b37ea2008-10-29 23:54:31 +0000622 exit_code = 1
jadmanski0afbb632008-06-06 21:10:57 +0000623 traceback.print_exc()
Prathmesh Prabhu27bba962017-01-24 15:13:07 -0800624 finally:
625 metrics.Flush()
mbligha46678d2008-05-01 20:00:01 +0000626
jadmanski27b37ea2008-10-29 23:54:31 +0000627 sys.exit(exit_code)
mbligha46678d2008-05-01 20:00:01 +0000628
629
Prathmesh Prabhu9a631082018-05-11 17:30:09 -0700630# Job breakdown statuses
631_hs = host_states.Status
632_qs = host_queue_entry_states.Status
633_status_list = [
634 _qs.QUEUED, _qs.RESETTING, _qs.VERIFYING,
635 _qs.PROVISIONING, _hs.REPAIRING, _qs.CLEANING,
636 _qs.RUNNING, _qs.GATHERING, _qs.PARSING]
637_JOB_OVERHEAD_STATUS = enum.Enum(*_status_list, string_values=True)
Paul Hobbs68d98592017-08-22 02:22:49 -0700638
639
640def get_job_status(options):
641 """Returns the HQE Status for this run.
642
643 @param options: parser options.
644 """
Prathmesh Prabhu9a631082018-05-11 17:30:09 -0700645 s = _JOB_OVERHEAD_STATUS
Fang Deng042c1472014-10-23 13:56:41 -0700646 task_mapping = {
647 'reset': s.RESETTING, 'verify': s.VERIFYING,
648 'provision': s.PROVISIONING, 'repair': s.REPAIRING,
649 'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
Paul Hobbs68d98592017-08-22 02:22:49 -0700650 match = [task for task in task_mapping if getattr(options, task, False)]
651 return task_mapping[match[0]] if match else s.RUNNING
Fang Deng042c1472014-10-23 13:56:41 -0700652
653
mbligha46678d2008-05-01 20:00:01 +0000654def main():
Fang Deng042c1472014-10-23 13:56:41 -0700655 start_time = datetime.datetime.now()
jadmanski0afbb632008-06-06 21:10:57 +0000656 # grab the parser
657 parser = autoserv_parser.autoserv_parser
mbligha5cb4062009-02-17 15:53:39 +0000658 parser.parse_args()
mbligha46678d2008-05-01 20:00:01 +0000659
jadmanski0afbb632008-06-06 21:10:57 +0000660 if len(sys.argv) == 1:
661 parser.parser.print_help()
662 sys.exit(1)
mbligha6f13082008-06-05 23:53:46 +0000663
showard75cdfee2009-06-10 17:40:41 +0000664 if parser.options.no_logging:
665 results = None
666 else:
667 results = parser.options.results
mbligh80e1eba2008-11-19 00:26:18 +0000668 if not results:
669 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
Dan Shi14de7622016-08-22 11:09:06 -0700670 results = os.path.abspath(results)
showard566d3c02010-01-12 18:57:01 +0000671 resultdir_exists = False
672 for filename in ('control.srv', 'status.log', '.autoserv_execute'):
673 if os.path.exists(os.path.join(results, filename)):
674 resultdir_exists = True
mbligh4608b002010-01-05 18:22:35 +0000675 if not parser.options.use_existing_results and resultdir_exists:
mbligh80e1eba2008-11-19 00:26:18 +0000676 error = "Error: results directory already exists: %s\n" % results
677 sys.stderr.write(error)
678 sys.exit(1)
mbligha788dc42009-03-26 21:10:16 +0000679
680 # Now that we certified that there's no leftover results dir from
681 # previous jobs, lets create the result dir since the logging system
682 # needs to create the log file in there.
683 if not os.path.isdir(results):
684 os.makedirs(results)
showard75cdfee2009-06-10 17:40:41 +0000685
Prathmesh Prabhu7ae68ae2017-06-15 00:29:31 -0700686 # If the job requires to run with server-side package, try to stage server-
687 # side package first. If that fails with error that autotest server package
688 # does not exist, fall back to run the job without using server-side
689 # packaging. If option warn_no_ssp is specified, that means autoserv is
690 # running in a drone does not support SSP, thus no need to stage server-side
691 # package.
692 ssp_url = None
693 ssp_url_warning = False
694 if (not parser.options.warn_no_ssp and parser.options.require_ssp):
Prathmesh Prabhu588007d2017-06-15 00:31:31 -0700695 ssp_url, ssp_error_msg = _stage_ssp(parser, results)
Prathmesh Prabhu7ae68ae2017-06-15 00:29:31 -0700696 # The build does not have autotest server package. Fall back to not
697 # to use server-side package. Logging is postponed until logging being
698 # set up.
699 ssp_url_warning = not ssp_url
700
Dan Shic68fefb2015-04-07 10:10:52 -0700701 # Server-side packaging will only be used if it's required and the package
702 # is available. If warn_no_ssp is specified, it means that autoserv is
703 # running in a drone does not have SSP supported and a warning will be logs.
704 # Therefore, it should not run with SSP.
705 use_ssp = (not parser.options.warn_no_ssp and parser.options.require_ssp
706 and ssp_url)
707 if use_ssp:
Dan Shie28de552015-05-06 16:51:58 -0700708 log_dir = os.path.join(results, 'ssp_logs') if results else None
Dan Shicf4d2032015-03-12 15:04:21 -0700709 if log_dir and not os.path.exists(log_dir):
710 os.makedirs(log_dir)
711 else:
712 log_dir = results
Dan Shi3f1b8a52015-04-21 11:11:06 -0700713
showard75cdfee2009-06-10 17:40:41 +0000714 logging_manager.configure_logging(
Dan Shicf4d2032015-03-12 15:04:21 -0700715 server_logging_config.ServerLoggingConfig(),
716 results_dir=log_dir,
showard10d84172009-06-18 23:16:50 +0000717 use_console=not parser.options.no_tee,
718 verbose=parser.options.verbose,
719 no_console_prefix=parser.options.no_console_prefix)
Dan Shicf4d2032015-03-12 15:04:21 -0700720
Dan Shi0b754c52015-04-20 14:20:38 -0700721 if ssp_url_warning:
722 logging.warn(
723 'Autoserv is required to run with server-side packaging. '
Dan Shi6bfbdb62017-09-25 13:33:53 -0700724 'However, no server-side package can be staged based on '
Richard Barnette71854c72018-03-30 14:22:09 -0700725 '`--test_source_build`, host attribute job_repo_url or host '
726 'OS version label. It could be that the build to test is '
727 'older than the minimum version that supports server-side '
728 'packaging, or no devserver can be found to stage server-side '
729 'package. The test will be executed without using server-side '
730 'packaging. Following is the detailed error:\n%s',
731 ssp_error_msg)
Dan Shi0b754c52015-04-20 14:20:38 -0700732
showard75cdfee2009-06-10 17:40:41 +0000733 if results:
mbligha788dc42009-03-26 21:10:16 +0000734 logging.info("Results placed in %s" % results)
mbligh10717632008-11-19 00:21:57 +0000735
mbligh4608b002010-01-05 18:22:35 +0000736 # wait until now to perform this check, so it get properly logged
Dan Shicf4d2032015-03-12 15:04:21 -0700737 if (parser.options.use_existing_results and not resultdir_exists and
Dan Shiff78f112015-06-12 13:34:02 -0700738 not utils.is_in_container()):
mbligh4608b002010-01-05 18:22:35 +0000739 logging.error("No existing results directory found: %s", results)
740 sys.exit(1)
741
Dan Shicf4d2032015-03-12 15:04:21 -0700742 logging.debug('autoserv is running in drone %s.', socket.gethostname())
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700743 logging.debug('autoserv command was: %s', ' '.join(sys.argv))
Prathmesh Prabhu7fc39c52018-03-21 14:08:30 -0700744 logging.debug('autoserv parsed options: %s', parser.options)
mbligh4608b002010-01-05 18:22:35 +0000745
Dan Shicf4d2032015-03-12 15:04:21 -0700746 if parser.options.write_pidfile and results:
mbligh4608b002010-01-05 18:22:35 +0000747 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
748 results)
jadmanskid5ab8c52008-12-03 16:27:07 +0000749 pid_file_manager.open_file()
mblighff7d61f2008-12-22 14:53:35 +0000750 else:
751 pid_file_manager = None
mbligha46678d2008-05-01 20:00:01 +0000752
Allen Lid5abdab2017-02-07 16:03:43 -0800753 autotest.Autotest.set_install_in_tmpdir(
jadmanskif22fea82008-11-26 20:57:07 +0000754 parser.options.install_in_tmpdir)
755
jadmanski0afbb632008-06-06 21:10:57 +0000756 exit_code = 0
Allen Lif146e872017-08-15 18:24:31 -0700757 # TODO(beeps): Extend this to cover different failure modes.
758 # Testing exceptions are matched against labels sent to autoserv. Eg,
759 # to allow only the hostless job to run, specify
760 # testing_exceptions: test_suite in the shadow_config. To allow both
761 # the hostless job and dummy_Pass to run, specify
762 # testing_exceptions: test_suite,dummy_Pass. You can figure out
763 # what label autoserv is invoked with by looking through the logs of a test
764 # for the autoserv command's -l option.
765 testing_exceptions = _CONFIG.get_config_value(
766 'AUTOSERV', 'testing_exceptions', type=list, default=[])
767 test_mode = _CONFIG.get_config_value(
768 'AUTOSERV', 'testing_mode', type=bool, default=False)
769 test_mode = (results_mocker and test_mode and not
770 any([ex in parser.options.label
771 for ex in testing_exceptions]))
772 is_task = (parser.options.verify or parser.options.repair or
773 parser.options.provision or parser.options.reset or
774 parser.options.cleanup or parser.options.collect_crashinfo)
Paul Hobbse9fd5572017-08-22 02:48:25 -0700775
776 trace_labels = {
777 'job_id': job_directories.get_job_id_or_task_id(
778 parser.options.results)
779 }
780 trace = cloud_trace.SpanStack(
781 labels=trace_labels,
782 global_context=parser.options.cloud_trace_context)
783 trace.enabled = parser.options.cloud_trace_context_enabled == 'True'
jadmanski0afbb632008-06-06 21:10:57 +0000784 try:
785 try:
Allen Lif146e872017-08-15 18:24:31 -0700786 if test_mode:
787 # The parser doesn't run on tasks anyway, so we can just return
788 # happy signals without faking results.
789 if not is_task:
790 machine = parser.options.results.split('/')[-1]
791
792 # TODO(beeps): The proper way to do this would be to
793 # refactor job creation so we can invoke job.record
794 # directly. To do that one needs to pipe the test_name
795 # through run_autoserv and bail just before invoking
796 # the server job. See the comment in
797 # puppylab/results_mocker for more context.
798 results_mocker.ResultsMocker(
799 'unknown-test', parser.options.results, machine
800 ).mock_results()
801 return
802 else:
Paul Hobbse9fd5572017-08-22 02:48:25 -0700803 with trace.Span(get_job_status(parser.options)):
804 run_autoserv(pid_file_manager, results, parser, ssp_url,
805 use_ssp)
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700806 except SystemExit as e:
jadmanski0afbb632008-06-06 21:10:57 +0000807 exit_code = e.code
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700808 if exit_code:
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700809 logging.exception('Uncaught SystemExit with code %s', exit_code)
810 except Exception:
jadmanski0afbb632008-06-06 21:10:57 +0000811 # If we don't know what happened, we'll classify it as
812 # an 'abort' and return 1.
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700813 logging.exception('Uncaught Exception, exit_code = 1.')
jadmanski0afbb632008-06-06 21:10:57 +0000814 exit_code = 1
815 finally:
mblighff7d61f2008-12-22 14:53:35 +0000816 if pid_file_manager:
817 pid_file_manager.close_file(exit_code)
jadmanski0afbb632008-06-06 21:10:57 +0000818 sys.exit(exit_code)
mblighfaf0cd42007-11-19 16:00:24 +0000819
mblighbb421852008-03-11 22:36:16 +0000820
mbligha46678d2008-05-01 20:00:01 +0000821if __name__ == '__main__':
jadmanski0afbb632008-06-06 21:10:57 +0000822 main()