blob: 1901364d87619539ceeba23de5817f82529c50d6 [file] [log] [blame]
mbligh6203ace2007-10-04 21:54:24 +00001#!/usr/bin/python -u
mbligh1ffd5dc2008-11-25 13:24:05 +00002# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
mbligh82648e52008-11-20 16:54:25 +00003# Released under the GPL v2
mblighdcd57a82007-07-11 23:06:47 +00004
mblighc8949b82007-07-23 16:33:58 +00005"""
Aviv Keshetde6bb192013-01-30 16:17:22 -08006Run a control file through the server side engine
mblighdcd57a82007-07-11 23:06:47 +00007"""
mbligh1ffd5dc2008-11-25 13:24:05 +00008
Fang Deng042c1472014-10-23 13:56:41 -07009import ast
10import datetime
11import getpass
12import logging
13import os
14import re
15import signal
Dan Shicf4d2032015-03-12 15:04:21 -070016import socket
Fang Deng042c1472014-10-23 13:56:41 -070017import sys
18import traceback
19import time
20import urllib2
mbligh1ffd5dc2008-11-25 13:24:05 +000021
mblighf5427bb2008-04-09 15:55:57 +000022import common
mbligh9ff89cd2009-09-03 20:28:17 +000023
Dan Shia1ecd5c2013-06-06 11:21:31 -070024from autotest_lib.client.common_lib import control_data
Dan Shi32649b82015-08-29 20:53:36 -070025from autotest_lib.client.common_lib import error
Dan Shia1ecd5c2013-06-06 11:21:31 -070026from autotest_lib.client.common_lib import global_config
Dan Shi5ddf6a32015-05-02 00:22:01 -070027from autotest_lib.client.common_lib import utils
Dan Shi37bee222015-04-13 15:46:47 -070028from autotest_lib.client.common_lib.cros.graphite import autotest_es
29from autotest_lib.client.common_lib.cros.graphite import autotest_stats
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -080030try:
31 from autotest_lib.puppylab import results_mocker
32except ImportError:
33 results_mocker = None
34
Dan Shia06f3e22015-09-03 16:15:15 -070035_CONFIG = global_config.global_config
36
37require_atfork = _CONFIG.get_config_value(
mblighcb8cb332009-09-03 21:08:56 +000038 'AUTOSERV', 'require_atfork_module', type=bool, default=True)
39
Dan Shia1ecd5c2013-06-06 11:21:31 -070040
Jakob Jueliche497b552014-09-23 19:11:59 -070041# Number of seconds to wait before returning if testing mode is enabled
Prashanth B6285f6a2014-05-08 18:01:27 -070042TESTING_MODE_SLEEP_SECS = 1
Jakob Jueliche497b552014-09-23 19:11:59 -070043
mblighcb8cb332009-09-03 21:08:56 +000044try:
45 import atfork
46 atfork.monkeypatch_os_fork_functions()
47 import atfork.stdlib_fixer
48 # Fix the Python standard library for threading+fork safety with its
49 # internal locks. http://code.google.com/p/python-atfork/
50 import warnings
51 warnings.filterwarnings('ignore', 'logging module already imported')
52 atfork.stdlib_fixer.fix_logging_module()
53except ImportError, e:
54 from autotest_lib.client.common_lib import global_config
Dan Shia06f3e22015-09-03 16:15:15 -070055 if _CONFIG.get_config_value(
mblighcb8cb332009-09-03 21:08:56 +000056 'AUTOSERV', 'require_atfork_module', type=bool, default=False):
57 print >>sys.stderr, 'Please run utils/build_externals.py'
58 print e
59 sys.exit(1)
mbligh9ff89cd2009-09-03 20:28:17 +000060
Kevin Cheng9b6930f2016-07-20 14:57:15 -070061from autotest_lib.server import frontend
showard75cdfee2009-06-10 17:40:41 +000062from autotest_lib.server import server_logging_config
showard043c62a2009-06-10 19:48:57 +000063from autotest_lib.server import server_job, utils, autoserv_parser, autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070064from autotest_lib.server import utils as server_utils
Kevin Chengadc99f92016-07-20 08:21:58 -070065from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Dan Shicf4d2032015-03-12 15:04:21 -070066from autotest_lib.site_utils import job_directories
Fang Deng042c1472014-10-23 13:56:41 -070067from autotest_lib.site_utils import job_overhead
Dan Shicf4d2032015-03-12 15:04:21 -070068from autotest_lib.site_utils import lxc
Dan Shi7836d252015-04-27 15:33:58 -070069from autotest_lib.site_utils import lxc_utils
showard75cdfee2009-06-10 17:40:41 +000070from autotest_lib.client.common_lib import pidfile, logging_manager
Gabe Black1e1c41b2015-02-04 23:55:15 -080071from autotest_lib.client.common_lib.cros.graphite import autotest_stats
mbligh92c0fc22008-11-20 16:52:23 +000072
Dan Shicf4d2032015-03-12 15:04:21 -070073# Control segment to stage server-side package.
74STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
75 'stage_server_side_package')
76
Dan Shia06f3e22015-09-03 16:15:15 -070077# Command line to start servod in a moblab.
78START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s'
79STOP_SERVOD_CMD = 'sudo stop servod'
80
Alex Millerf1af17e2013-01-09 22:50:32 -080081def log_alarm(signum, frame):
82 logging.error("Received SIGALARM. Ignoring and continuing on.")
Alex Miller0528d6f2013-01-11 10:49:48 -080083 sys.exit(1)
Alex Millerf1af17e2013-01-09 22:50:32 -080084
Dan Shicf4d2032015-03-12 15:04:21 -070085
86def _get_machines(parser):
87 """Get a list of machine names from command line arg -m or a file.
88
89 @param parser: Parser for the command line arguments.
90
91 @return: A list of machine names from command line arg -m or the
92 machines file specified in the command line arg -M.
93 """
94 if parser.options.machines:
95 machines = parser.options.machines.replace(',', ' ').strip().split()
96 else:
97 machines = []
98 machines_file = parser.options.machines_file
99 if machines_file:
100 machines = []
101 for m in open(machines_file, 'r').readlines():
102 # remove comments, spaces
103 m = re.sub('#.*', '', m).strip()
104 if m:
105 machines.append(m)
106 logging.debug('Read list of machines from file: %s', machines_file)
107 logging.debug('Machines: %s', ','.join(machines))
108
109 if machines:
110 for machine in machines:
111 if not machine or re.search('\s', machine):
112 parser.parser.error("Invalid machine: %s" % str(machine))
113 machines = list(set(machines))
114 machines.sort()
115 return machines
116
117
118def _stage_ssp(parser):
119 """Stage server-side package.
120
121 This function calls a control segment to stage server-side package based on
122 the job and autoserv command line option. The detail implementation could
123 be different for each host type. Currently, only CrosHost has
124 stage_server_side_package function defined.
125 The script returns None if no server-side package is available. However,
126 it may raise exception if it failed for reasons other than artifact (the
127 server-side package) not found.
128
129 @param parser: Command line arguments parser passed in the autoserv process.
130
131 @return: url of the staged server-side package. Return None if server-
132 side package is not found for the build.
133 """
Kevin Chengadc99f92016-07-20 08:21:58 -0700134 machines_list = _get_machines(parser)
135 if bool(parser.options.lab):
136 machine_dict_list = []
137 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
138 for machine in machines_list:
139 afe_host = afe.get_hosts(hostname=machine)[0]
140 machine_dict_list.append({'hostname': machine, 'afe_host': afe_host})
141 machines_list = machine_dict_list
142
Dan Shi36cfd832014-10-10 13:38:51 -0700143 # If test_source_build is not specified, default to use server-side test
144 # code from build specified in --image.
Kevin Chengadc99f92016-07-20 08:21:58 -0700145 namespace = {'machines': machines_list,
Dan Shi36cfd832014-10-10 13:38:51 -0700146 'image': (parser.options.test_source_build or
147 parser.options.image),}
Dan Shicf4d2032015-03-12 15:04:21 -0700148 script_locals = {}
149 execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
150 return script_locals['ssp_url']
151
152
Dan Shiafa63872016-02-23 15:32:31 -0800153def _run_with_ssp(job, container_name, job_id, results, parser, ssp_url,
154 job_folder):
Dan Shicf4d2032015-03-12 15:04:21 -0700155 """Run the server job with server-side packaging.
156
Dan Shi37befda2015-12-07 13:16:56 -0800157 @param job: The server job object.
Dan Shicf4d2032015-03-12 15:04:21 -0700158 @param container_name: Name of the container to run the test.
159 @param job_id: ID of the test job.
160 @param results: Folder to store results. This could be different from
161 parser.options.results:
162 parser.options.results can be set to None for results to be
163 stored in a temp folder.
164 results can be None for autoserv run requires no logging.
165 @param parser: Command line parser that contains the options.
166 @param ssp_url: url of the staged server-side package.
Dan Shiafa63872016-02-23 15:32:31 -0800167 @param job_folder: Name of the job result folder.
Dan Shicf4d2032015-03-12 15:04:21 -0700168 """
169 bucket = lxc.ContainerBucket()
170 control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
171 else None)
Dan Shi37befda2015-12-07 13:16:56 -0800172 try:
173 test_container = bucket.setup_test(container_name, job_id, ssp_url,
Dan Shiafa63872016-02-23 15:32:31 -0800174 results, control=control,
175 job_folder=job_folder)
Dan Shi37befda2015-12-07 13:16:56 -0800176 except Exception as e:
177 job.record('FAIL', None, None,
178 'Failed to setup container for test: %s. Check logs in '
179 'ssp_logs folder for more details.' % e)
180 raise
181
Dan Shicf4d2032015-03-12 15:04:21 -0700182 args = sys.argv[:]
183 args.remove('--require-ssp')
Dan Shi77b79a62015-07-29 16:22:05 -0700184 # --parent_job_id is only useful in autoserv running in host, not in
185 # container. Include this argument will cause test to fail for builds before
186 # CL 286265 was merged.
187 if '--parent_job_id' in args:
188 index = args.index('--parent_job_id')
189 args.remove('--parent_job_id')
190 # Remove the actual parent job id in command line arg.
191 del args[index]
Dan Shicf4d2032015-03-12 15:04:21 -0700192
193 # A dictionary of paths to replace in the command line. Key is the path to
194 # be replaced with the one in value.
195 paths_to_replace = {}
196 # Replace the control file path with the one in container.
197 if control:
198 container_control_filename = os.path.join(
199 lxc.CONTROL_TEMP_PATH, os.path.basename(control))
200 paths_to_replace[control] = container_control_filename
201 # Update result directory with the one in container.
202 if parser.options.results:
Dan Shiafa63872016-02-23 15:32:31 -0800203 container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_folder)
Dan Shicf4d2032015-03-12 15:04:21 -0700204 paths_to_replace[parser.options.results] = container_result_dir
205 # Update parse_job directory with the one in container. The assumption is
206 # that the result folder to be parsed is always the same as the results_dir.
207 if parser.options.parse_job:
Dan Shiafa63872016-02-23 15:32:31 -0800208 container_parse_dir = os.path.join(lxc.RESULT_DIR_FMT % job_folder)
Dan Shicf4d2032015-03-12 15:04:21 -0700209 paths_to_replace[parser.options.parse_job] = container_result_dir
210
211 args = [paths_to_replace.get(arg, arg) for arg in args]
212
213 # Apply --use-existing-results, results directory is aready created and
214 # mounted in container. Apply this arg to avoid exception being raised.
215 if not '--use-existing-results' in args:
216 args.append('--use-existing-results')
217
218 # Make sure autoserv running in container using a different pid file.
219 if not '--pidfile-label' in args:
220 args.extend(['--pidfile-label', 'container_autoserv'])
221
Dan Shid1f51232015-04-18 00:29:14 -0700222 cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
Dan Shicf4d2032015-03-12 15:04:21 -0700223 logging.info('Run command in container: %s', cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700224 success = False
Dan Shicf4d2032015-03-12 15:04:21 -0700225 try:
226 test_container.attach_run(cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700227 success = True
Dan Shi9d3454e2015-12-08 09:16:08 -0800228 except Exception as e:
229 # If the test run inside container fails without generating any log,
230 # write a message to status.log to help troubleshooting.
231 debug_files = os.listdir(os.path.join(results, 'debug'))
232 if not debug_files:
233 job.record('FAIL', None, None,
234 'Failed to run test inside the container: %s. Check '
235 'logs in ssp_logs folder for more details.' % e)
236 raise
Dan Shicf4d2032015-03-12 15:04:21 -0700237 finally:
Dan Shi37bee222015-04-13 15:46:47 -0700238 counter_key = '%s.%s' % (lxc.STATS_KEY,
239 'success' if success else 'fail')
240 autotest_stats.Counter(counter_key).increment()
241 # metadata is uploaded separately so it can use http to upload.
242 metadata = {'drone': socket.gethostname(),
243 'job_id': job_id,
244 'success': success}
245 autotest_es.post(use_http=True,
246 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
247 metadata=metadata)
Dan Shicf4d2032015-03-12 15:04:21 -0700248 test_container.destroy()
249
250
Dan Shi3f1b8a52015-04-21 11:11:06 -0700251def correct_results_folder_permission(results):
252 """Make sure the results folder has the right permission settings.
253
254 For tests running with server-side packaging, the results folder has the
255 owner of root. This must be changed to the user running the autoserv
256 process, so parsing job can access the results folder.
257 TODO(dshi): crbug.com/459344 Remove this function when test container can be
258 unprivileged container.
259
260 @param results: Path to the results folder.
261
262 """
263 if not results:
264 return
265
Dan Shi32649b82015-08-29 20:53:36 -0700266 try:
267 utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
268 utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
269 except error.CmdError as e:
270 metadata = {'error': str(e),
271 'result_folder': results,
272 'drone': socket.gethostname()}
273 autotest_es.post(use_http=True, type_str='correct_results_folder_failure',
274 metadata=metadata)
275 raise
Dan Shi3f1b8a52015-04-21 11:11:06 -0700276
277
Dan Shia06f3e22015-09-03 16:15:15 -0700278def _start_servod(machine):
279 """Try to start servod in moblab if it's not already running or running with
280 different board or port.
281
282 @param machine: Name of the dut used for test.
283 """
284 if not utils.is_moblab():
285 return
286
Dan Shi1cded882015-09-23 16:52:26 -0700287 logging.debug('Trying to start servod.')
Dan Shia06f3e22015-09-03 16:15:15 -0700288 try:
Kevin Cheng9b6930f2016-07-20 14:57:15 -0700289 afe = frontend.AFE()
Dan Shia06f3e22015-09-03 16:15:15 -0700290 board = server_utils.get_board_from_afe(machine, afe)
291 hosts = afe.get_hosts(hostname=machine)
292 servo_host = hosts[0].attributes.get('servo_host', None)
293 servo_port = hosts[0].attributes.get('servo_port', 9999)
294 if not servo_host in ['localhost', '127.0.0.1']:
Dan Shi1cded882015-09-23 16:52:26 -0700295 logging.warn('Starting servod is aborted. The dut\'s servo_host '
296 'attribute is not set to localhost.')
Dan Shia06f3e22015-09-03 16:15:15 -0700297 return
298 except (urllib2.HTTPError, urllib2.URLError):
299 # Ignore error if RPC failed to get board
300 logging.error('Failed to get board name from AFE. Start servod is '
301 'aborted')
302 return
303
304 try:
305 pid = utils.run('pgrep servod').stdout
306 cmd_line = utils.run('ps -fp %s' % pid).stdout
307 if ('--board %s' % board in cmd_line and
308 '--port %s' % servo_port in cmd_line):
309 logging.debug('Servod is already running with given board and port.'
310 ' There is no need to restart servod.')
311 return
312 logging.debug('Servod is running with different board or port. '
313 'Stopping existing servod.')
314 utils.run('sudo stop servod')
315 except error.CmdError:
316 # servod is not running.
317 pass
318
319 try:
320 utils.run(START_SERVOD_CMD % (board, servo_port))
321 logging.debug('Servod is started')
322 except error.CmdError as e:
323 logging.error('Servod failed to be started, error: %s', e)
324
325
Dan Shic68fefb2015-04-07 10:10:52 -0700326def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700327 """Run server job with given options.
328
329 @param pid_file_manager: PidFileManager used to monitor the autoserv process
330 @param results: Folder to store results.
331 @param parser: Parser for the command line arguments.
332 @param ssp_url: Url to server-side package.
Dan Shic68fefb2015-04-07 10:10:52 -0700333 @param use_ssp: Set to True to run with server-side packaging.
Dan Shicf4d2032015-03-12 15:04:21 -0700334 """
Dan Shiec1d47d2015-02-13 11:38:13 -0800335 if parser.options.warn_no_ssp:
Dan Shic68fefb2015-04-07 10:10:52 -0700336 # Post a warning in the log.
Dan Shiec1d47d2015-02-13 11:38:13 -0800337 logging.warn('Autoserv is required to run with server-side packaging. '
338 'However, no drone is found to support server-side '
339 'packaging. The test will be executed in a drone without '
340 'server-side packaging supported.')
341
jadmanski0afbb632008-06-06 21:10:57 +0000342 # send stdin to /dev/null
343 dev_null = os.open(os.devnull, os.O_RDONLY)
344 os.dup2(dev_null, sys.stdin.fileno())
345 os.close(dev_null)
mblighdbf37612007-11-24 19:38:11 +0000346
Dan Shie8aeb662016-06-30 11:22:03 -0700347 # Create separate process group if the process is not a process group
348 # leader. This allows autoserv process to keep running after the caller
349 # process (drone manager call) exits.
350 if os.getpid() != os.getpgid(0):
351 os.setsid()
mbligh1d42d4e2007-11-05 22:42:00 +0000352
Dan Shicf4d2032015-03-12 15:04:21 -0700353 # Container name is predefined so the container can be destroyed in
354 # handle_sigterm.
355 job_or_task_id = job_directories.get_job_id_or_task_id(
356 parser.options.results)
357 container_name = (lxc.TEST_CONTAINER_NAME_FMT %
Dan Shid68d51c2015-04-21 17:00:42 -0700358 (job_or_task_id, time.time(), os.getpid()))
Dan Shiafa63872016-02-23 15:32:31 -0800359 job_folder = job_directories.get_job_folder_name(parser.options.results)
Dan Shicf4d2032015-03-12 15:04:21 -0700360
jadmanski0afbb632008-06-06 21:10:57 +0000361 # Implement SIGTERM handler
mblighc2299562009-07-02 19:00:36 +0000362 def handle_sigterm(signum, frame):
Simran Basi9d9b7292013-10-16 16:44:07 -0700363 logging.debug('Received SIGTERM')
mblighff7d61f2008-12-22 14:53:35 +0000364 if pid_file_manager:
365 pid_file_manager.close_file(1, signal.SIGTERM)
Simran Basi49e21e62013-10-17 12:40:33 -0700366 logging.debug('Finished writing to pid_file. Killing process.')
Dan Shi3f1b8a52015-04-21 11:11:06 -0700367
368 # Update results folder's file permission. This needs to be done ASAP
369 # before the parsing process tries to access the log.
370 if use_ssp and results:
371 correct_results_folder_permission(results)
372
Simran Basid6b83772014-01-06 16:31:30 -0800373 # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
374 # This sleep allows the pending output to be logged before the kill
375 # signal is sent.
376 time.sleep(.1)
Dan Shic68fefb2015-04-07 10:10:52 -0700377 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700378 logging.debug('Destroy container %s before aborting the autoserv '
379 'process.', container_name)
Dan Shi3f1b8a52015-04-21 11:11:06 -0700380 metadata = {'drone': socket.gethostname(),
381 'job_id': job_or_task_id,
382 'container_name': container_name,
383 'action': 'abort',
384 'success': True}
Dan Shicf4d2032015-03-12 15:04:21 -0700385 try:
386 bucket = lxc.ContainerBucket()
387 container = bucket.get(container_name)
388 if container:
389 container.destroy()
390 else:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700391 metadata['success'] = False
392 metadata['error'] = 'container not found'
Dan Shicf4d2032015-03-12 15:04:21 -0700393 logging.debug('Container %s is not found.', container_name)
394 except:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700395 metadata['success'] = False
396 metadata['error'] = 'Exception: %s' % sys.exc_info()
Dan Shicf4d2032015-03-12 15:04:21 -0700397 # Handle any exception so the autoserv process can be aborted.
398 logging.error('Failed to destroy container %s. Error: %s',
399 container_name, sys.exc_info())
Dan Shi3f1b8a52015-04-21 11:11:06 -0700400 autotest_es.post(use_http=True,
401 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
402 metadata=metadata)
Dan Shie4a4f9f2015-07-20 09:00:25 -0700403 # Try to correct the result file permission again after the
404 # container is destroyed, as the container might have created some
405 # new files in the result folder.
406 if results:
407 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700408
jadmanski0afbb632008-06-06 21:10:57 +0000409 os.killpg(os.getpgrp(), signal.SIGKILL)
mblighfaf0cd42007-11-19 16:00:24 +0000410
jadmanski0afbb632008-06-06 21:10:57 +0000411 # Set signal handler
mblighc2299562009-07-02 19:00:36 +0000412 signal.signal(signal.SIGTERM, handle_sigterm)
mbligha46678d2008-05-01 20:00:01 +0000413
Simran Basid6b83772014-01-06 16:31:30 -0800414 # faulthandler is only needed to debug in the Lab and is not avaliable to
415 # be imported in the chroot as part of VMTest, so Try-Except it.
416 try:
417 import faulthandler
418 faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
419 logging.debug('faulthandler registered on SIGTERM.')
420 except ImportError:
Christopher Grant4beca022015-06-16 15:14:47 -0400421 sys.exc_clear()
Simran Basid6b83772014-01-06 16:31:30 -0800422
David Rochberg8a60d1e2011-02-01 14:22:07 -0500423 # Ignore SIGTTOU's generated by output from forked children.
424 signal.signal(signal.SIGTTOU, signal.SIG_IGN)
425
Alex Millerf1af17e2013-01-09 22:50:32 -0800426 # If we received a SIGALARM, let's be loud about it.
427 signal.signal(signal.SIGALRM, log_alarm)
428
mbligha5f5e542009-12-30 16:57:49 +0000429 # Server side tests that call shell scripts often depend on $USER being set
430 # but depending on how you launch your autotest scheduler it may not be set.
431 os.environ['USER'] = getpass.getuser()
432
mblighb2bea302008-07-24 20:25:57 +0000433 label = parser.options.label
mbligh374f3412009-05-13 21:29:45 +0000434 group_name = parser.options.group_name
mblighb2bea302008-07-24 20:25:57 +0000435 user = parser.options.user
436 client = parser.options.client
437 server = parser.options.server
jadmanski0afbb632008-06-06 21:10:57 +0000438 install_before = parser.options.install_before
mblighb2bea302008-07-24 20:25:57 +0000439 install_after = parser.options.install_after
440 verify = parser.options.verify
441 repair = parser.options.repair
showard45ae8192008-11-05 19:32:53 +0000442 cleanup = parser.options.cleanup
Alex Millercb79ba72013-05-29 14:43:00 -0700443 provision = parser.options.provision
Dan Shi07e09af2013-04-12 09:31:29 -0700444 reset = parser.options.reset
Alex Miller667b5f22014-02-28 15:33:39 -0800445 job_labels = parser.options.job_labels
mblighb2bea302008-07-24 20:25:57 +0000446 no_tee = parser.options.no_tee
jadmanski0afbb632008-06-06 21:10:57 +0000447 parse_job = parser.options.parse_job
mblighe7d9c602009-07-02 19:02:33 +0000448 execution_tag = parser.options.execution_tag
449 if not execution_tag:
450 execution_tag = parse_job
jadmanski0afbb632008-06-06 21:10:57 +0000451 ssh_user = parser.options.ssh_user
452 ssh_port = parser.options.ssh_port
453 ssh_pass = parser.options.ssh_pass
jadmanskidef0c3c2009-03-25 20:07:10 +0000454 collect_crashinfo = parser.options.collect_crashinfo
mblighe0cbc912010-03-11 18:03:07 +0000455 control_filename = parser.options.control_filename
Scott Zawalski91493c82013-01-25 16:15:20 -0500456 test_retry = parser.options.test_retry
beepscb6f1e22013-06-28 19:14:10 -0700457 verify_job_repo_url = parser.options.verify_job_repo_url
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700458 skip_crash_collection = parser.options.skip_crash_collection
Aviv Keshet18ee3142013-08-12 15:01:51 -0700459 ssh_verbosity = int(parser.options.ssh_verbosity)
Fang Deng6cc20de2013-09-06 15:47:32 -0700460 ssh_options = parser.options.ssh_options
Dan Shib669cbd2013-09-13 11:17:17 -0700461 no_use_packaging = parser.options.no_use_packaging
Simran Basi1bf60eb2015-12-01 16:39:29 -0800462 host_attributes = parser.options.host_attributes
463 in_lab = bool(parser.options.lab)
mbligha46678d2008-05-01 20:00:01 +0000464
mblighb2bea302008-07-24 20:25:57 +0000465 # can't be both a client and a server side test
466 if client and server:
Eric Li861b2d52011-02-04 14:50:35 -0800467 parser.parser.error("Can not specify a test as both server and client!")
mblighb2bea302008-07-24 20:25:57 +0000468
Alex Millercb79ba72013-05-29 14:43:00 -0700469 if provision and client:
470 parser.parser.error("Cannot specify provisioning and client!")
471
472 is_special_task = (verify or repair or cleanup or collect_crashinfo or
Dan Shi07e09af2013-04-12 09:31:29 -0700473 provision or reset)
Alex Millercb79ba72013-05-29 14:43:00 -0700474 if len(parser.args) < 1 and not is_special_task:
Eric Li861b2d52011-02-04 14:50:35 -0800475 parser.parser.error("Missing argument: control file")
mbligha46678d2008-05-01 20:00:01 +0000476
Aviv Keshet18ee3142013-08-12 15:01:51 -0700477 if ssh_verbosity > 0:
478 # ssh_verbosity is an integer between 0 and 3, inclusive
479 ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
Fang Dengd1c2b732013-08-20 12:59:46 -0700480 else:
481 ssh_verbosity_flag = ''
Aviv Keshet18ee3142013-08-12 15:01:51 -0700482
showard45ae8192008-11-05 19:32:53 +0000483 # We have a control file unless it's just a verify/repair/cleanup job
jadmanski0afbb632008-06-06 21:10:57 +0000484 if len(parser.args) > 0:
485 control = parser.args[0]
486 else:
487 control = None
mbligha46678d2008-05-01 20:00:01 +0000488
Dan Shicf4d2032015-03-12 15:04:21 -0700489 machines = _get_machines(parser)
mbligh374f3412009-05-13 21:29:45 +0000490 if group_name and len(machines) < 2:
Dan Shicf4d2032015-03-12 15:04:21 -0700491 parser.parser.error('-G %r may only be supplied with more than one '
492 'machine.' % group_name)
mbligh374f3412009-05-13 21:29:45 +0000493
Christopher Wiley8a91f232013-07-09 11:02:27 -0700494 kwargs = {'group_name': group_name, 'tag': execution_tag,
Dan Shicf4d2032015-03-12 15:04:21 -0700495 'disable_sysinfo': parser.options.disable_sysinfo}
Dan Shi70647ca2015-07-16 22:52:35 -0700496 if parser.options.parent_job_id:
497 kwargs['parent_job_id'] = int(parser.options.parent_job_id)
mblighe0cbc912010-03-11 18:03:07 +0000498 if control_filename:
499 kwargs['control_filename'] = control_filename
Simran Basi1bf60eb2015-12-01 16:39:29 -0800500 if host_attributes:
501 kwargs['host_attributes'] = host_attributes
502 kwargs['in_lab'] = in_lab
jadmanski0afbb632008-06-06 21:10:57 +0000503 job = server_job.server_job(control, parser.args[1:], results, label,
504 user, machines, client, parse_job,
Fang Dengd1c2b732013-08-20 12:59:46 -0700505 ssh_user, ssh_port, ssh_pass,
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700506 ssh_verbosity_flag, ssh_options,
507 test_retry, **kwargs)
Dan Shicf4d2032015-03-12 15:04:21 -0700508
showard75cdfee2009-06-10 17:40:41 +0000509 job.logging.start_logging()
mbligh4608b002010-01-05 18:22:35 +0000510 job.init_parser()
mbligha46678d2008-05-01 20:00:01 +0000511
mbligh161fe6f2008-06-19 16:26:04 +0000512 # perform checks
513 job.precheck()
514
jadmanski0afbb632008-06-06 21:10:57 +0000515 # run the job
516 exit_code = 0
Dan Shic1b8bdd2015-09-14 23:11:24 -0700517 auto_start_servod = _CONFIG.get_config_value(
518 'AUTOSERV', 'auto_start_servod', type=bool, default=False)
jadmanski0afbb632008-06-06 21:10:57 +0000519 try:
mbligh332000a2009-06-08 16:47:28 +0000520 try:
521 if repair:
Dan Shic1b8bdd2015-09-14 23:11:24 -0700522 if auto_start_servod and len(machines) == 1:
523 _start_servod(machines[0])
J. Richard Barnettec2d99cf2015-11-18 12:46:15 -0800524 job.repair(job_labels)
mbligh332000a2009-06-08 16:47:28 +0000525 elif verify:
Alex Miller667b5f22014-02-28 15:33:39 -0800526 job.verify(job_labels)
Alex Millercb79ba72013-05-29 14:43:00 -0700527 elif provision:
Alex Miller667b5f22014-02-28 15:33:39 -0800528 job.provision(job_labels)
Dan Shi07e09af2013-04-12 09:31:29 -0700529 elif reset:
Alex Miller667b5f22014-02-28 15:33:39 -0800530 job.reset(job_labels)
Fang Dengad78aca2014-10-02 18:15:46 -0700531 elif cleanup:
532 job.cleanup(job_labels)
mbligh332000a2009-06-08 16:47:28 +0000533 else:
Dan Shia06f3e22015-09-03 16:15:15 -0700534 if auto_start_servod and len(machines) == 1:
535 _start_servod(machines[0])
Dan Shic68fefb2015-04-07 10:10:52 -0700536 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700537 try:
Dan Shi37befda2015-12-07 13:16:56 -0800538 _run_with_ssp(job, container_name, job_or_task_id,
Dan Shiafa63872016-02-23 15:32:31 -0800539 results, parser, ssp_url, job_folder)
Dan Shicf4d2032015-03-12 15:04:21 -0700540 finally:
541 # Update the ownership of files in result folder.
Dan Shi3f1b8a52015-04-21 11:11:06 -0700542 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700543 else:
Dan Shiafa63872016-02-23 15:32:31 -0800544 if collect_crashinfo:
545 # Update the ownership of files in result folder. If the
546 # job to collect crashinfo was running inside container
547 # (SSP) and crashed before correcting folder permission,
548 # the result folder might have wrong permission setting.
549 try:
550 correct_results_folder_permission(results)
551 except:
552 # Ignore any error as the user may not have root
553 # permission to run sudo command.
554 pass
Dan Shicf4d2032015-03-12 15:04:21 -0700555 job.run(install_before, install_after,
556 verify_job_repo_url=verify_job_repo_url,
557 only_collect_crashinfo=collect_crashinfo,
558 skip_crash_collection=skip_crash_collection,
Dan Shib669cbd2013-09-13 11:17:17 -0700559 job_labels=job_labels,
560 use_packaging=(not no_use_packaging))
mbligh332000a2009-06-08 16:47:28 +0000561 finally:
562 while job.hosts:
563 host = job.hosts.pop()
564 host.close()
jadmanski0afbb632008-06-06 21:10:57 +0000565 except:
jadmanski27b37ea2008-10-29 23:54:31 +0000566 exit_code = 1
jadmanski0afbb632008-06-06 21:10:57 +0000567 traceback.print_exc()
mbligha46678d2008-05-01 20:00:01 +0000568
mblighff7d61f2008-12-22 14:53:35 +0000569 if pid_file_manager:
570 pid_file_manager.num_tests_failed = job.num_tests_failed
571 pid_file_manager.close_file(exit_code)
jadmanskie0dffc32008-12-15 17:30:30 +0000572 job.cleanup_parser()
showard21baa452008-10-21 00:08:39 +0000573
jadmanski27b37ea2008-10-29 23:54:31 +0000574 sys.exit(exit_code)
mbligha46678d2008-05-01 20:00:01 +0000575
576
Fang Deng042c1472014-10-23 13:56:41 -0700577def record_autoserv(options, duration_secs):
578 """Record autoserv end-to-end time in metadata db.
579
580 @param options: parser options.
581 @param duration_secs: How long autoserv has taken, in secs.
582 """
583 # Get machine hostname
584 machines = options.machines.replace(
585 ',', ' ').strip().split() if options.machines else []
586 num_machines = len(machines)
587 if num_machines > 1:
588 # Skip the case where atomic group is used.
589 return
590 elif num_machines == 0:
591 machines.append('hostless')
592
593 # Determine the status that will be reported.
594 s = job_overhead.STATUS
595 task_mapping = {
596 'reset': s.RESETTING, 'verify': s.VERIFYING,
597 'provision': s.PROVISIONING, 'repair': s.REPAIRING,
598 'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
Dan Shi888cfca2015-07-31 15:49:00 -0700599 match = filter(lambda task: getattr(options, task, False) == True,
600 task_mapping)
Fang Deng042c1472014-10-23 13:56:41 -0700601 status = task_mapping[match[0]] if match else s.RUNNING
602 is_special_task = status not in [s.RUNNING, s.GATHERING]
Dan Shicf4d2032015-03-12 15:04:21 -0700603 job_or_task_id = job_directories.get_job_id_or_task_id(options.results)
Fang Deng042c1472014-10-23 13:56:41 -0700604 job_overhead.record_state_duration(
605 job_or_task_id, machines[0], status, duration_secs,
606 is_special_task=is_special_task)
607
608
mbligha46678d2008-05-01 20:00:01 +0000609def main():
Fang Deng042c1472014-10-23 13:56:41 -0700610 start_time = datetime.datetime.now()
Dan Shia1ecd5c2013-06-06 11:21:31 -0700611 # White list of tests with run time measurement enabled.
Dan Shia06f3e22015-09-03 16:15:15 -0700612 measure_run_time_tests_names = _CONFIG.get_config_value(
613 'AUTOSERV', 'measure_run_time_tests', type=str)
Dan Shia1ecd5c2013-06-06 11:21:31 -0700614 if measure_run_time_tests_names:
615 measure_run_time_tests = [t.strip() for t in
616 measure_run_time_tests_names.split(',')]
617 else:
618 measure_run_time_tests = []
jadmanski0afbb632008-06-06 21:10:57 +0000619 # grab the parser
620 parser = autoserv_parser.autoserv_parser
mbligha5cb4062009-02-17 15:53:39 +0000621 parser.parse_args()
mbligha46678d2008-05-01 20:00:01 +0000622
jadmanski0afbb632008-06-06 21:10:57 +0000623 if len(sys.argv) == 1:
624 parser.parser.print_help()
625 sys.exit(1)
mbligha6f13082008-06-05 23:53:46 +0000626
Dan Shicf4d2032015-03-12 15:04:21 -0700627 # If the job requires to run with server-side package, try to stage server-
628 # side package first. If that fails with error that autotest server package
Dan Shic68fefb2015-04-07 10:10:52 -0700629 # does not exist, fall back to run the job without using server-side
630 # packaging. If option warn_no_ssp is specified, that means autoserv is
631 # running in a drone does not support SSP, thus no need to stage server-side
632 # package.
Dan Shicf4d2032015-03-12 15:04:21 -0700633 ssp_url = None
Dan Shi0b754c52015-04-20 14:20:38 -0700634 ssp_url_warning = False
Dan Shic68fefb2015-04-07 10:10:52 -0700635 if (not parser.options.warn_no_ssp and parser.options.require_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700636 ssp_url = _stage_ssp(parser)
Dan Shi0b754c52015-04-20 14:20:38 -0700637 # The build does not have autotest server package. Fall back to not
638 # to use server-side package. Logging is postponed until logging being
639 # set up.
640 ssp_url_warning = not ssp_url
Dan Shicf4d2032015-03-12 15:04:21 -0700641
showard75cdfee2009-06-10 17:40:41 +0000642 if parser.options.no_logging:
643 results = None
644 else:
645 results = parser.options.results
mbligh80e1eba2008-11-19 00:26:18 +0000646 if not results:
647 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
648 results = os.path.abspath(results)
showard566d3c02010-01-12 18:57:01 +0000649 resultdir_exists = False
650 for filename in ('control.srv', 'status.log', '.autoserv_execute'):
651 if os.path.exists(os.path.join(results, filename)):
652 resultdir_exists = True
mbligh4608b002010-01-05 18:22:35 +0000653 if not parser.options.use_existing_results and resultdir_exists:
mbligh80e1eba2008-11-19 00:26:18 +0000654 error = "Error: results directory already exists: %s\n" % results
655 sys.stderr.write(error)
656 sys.exit(1)
mbligha788dc42009-03-26 21:10:16 +0000657
658 # Now that we certified that there's no leftover results dir from
659 # previous jobs, lets create the result dir since the logging system
660 # needs to create the log file in there.
661 if not os.path.isdir(results):
662 os.makedirs(results)
showard75cdfee2009-06-10 17:40:41 +0000663
Dan Shic68fefb2015-04-07 10:10:52 -0700664 # Server-side packaging will only be used if it's required and the package
665 # is available. If warn_no_ssp is specified, it means that autoserv is
666 # running in a drone does not have SSP supported and a warning will be logs.
667 # Therefore, it should not run with SSP.
668 use_ssp = (not parser.options.warn_no_ssp and parser.options.require_ssp
669 and ssp_url)
670 if use_ssp:
Dan Shie28de552015-05-06 16:51:58 -0700671 log_dir = os.path.join(results, 'ssp_logs') if results else None
Dan Shicf4d2032015-03-12 15:04:21 -0700672 if log_dir and not os.path.exists(log_dir):
673 os.makedirs(log_dir)
674 else:
675 log_dir = results
Dan Shi3f1b8a52015-04-21 11:11:06 -0700676
showard75cdfee2009-06-10 17:40:41 +0000677 logging_manager.configure_logging(
Dan Shicf4d2032015-03-12 15:04:21 -0700678 server_logging_config.ServerLoggingConfig(),
679 results_dir=log_dir,
showard10d84172009-06-18 23:16:50 +0000680 use_console=not parser.options.no_tee,
681 verbose=parser.options.verbose,
682 no_console_prefix=parser.options.no_console_prefix)
Dan Shicf4d2032015-03-12 15:04:21 -0700683
Dan Shi0b754c52015-04-20 14:20:38 -0700684 if ssp_url_warning:
685 logging.warn(
686 'Autoserv is required to run with server-side packaging. '
687 'However, no server-side package can be found based on '
Dan Shi6450e142016-03-11 11:52:20 -0800688 '`--image`, host attribute job_repo_url or host OS version '
689 'label. It could be that the build to test is older than the '
690 'minimum version that supports server-side packaging. The test '
691 'will be executed without using erver-side packaging.')
Dan Shi0b754c52015-04-20 14:20:38 -0700692
showard75cdfee2009-06-10 17:40:41 +0000693 if results:
mbligha788dc42009-03-26 21:10:16 +0000694 logging.info("Results placed in %s" % results)
mbligh10717632008-11-19 00:21:57 +0000695
mbligh4608b002010-01-05 18:22:35 +0000696 # wait until now to perform this check, so it get properly logged
Dan Shicf4d2032015-03-12 15:04:21 -0700697 if (parser.options.use_existing_results and not resultdir_exists and
Dan Shiff78f112015-06-12 13:34:02 -0700698 not utils.is_in_container()):
mbligh4608b002010-01-05 18:22:35 +0000699 logging.error("No existing results directory found: %s", results)
700 sys.exit(1)
701
Dan Shicf4d2032015-03-12 15:04:21 -0700702 logging.debug('autoserv is running in drone %s.', socket.gethostname())
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700703 logging.debug('autoserv command was: %s', ' '.join(sys.argv))
mbligh4608b002010-01-05 18:22:35 +0000704
Dan Shicf4d2032015-03-12 15:04:21 -0700705 if parser.options.write_pidfile and results:
mbligh4608b002010-01-05 18:22:35 +0000706 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
707 results)
jadmanskid5ab8c52008-12-03 16:27:07 +0000708 pid_file_manager.open_file()
mblighff7d61f2008-12-22 14:53:35 +0000709 else:
710 pid_file_manager = None
mbligha46678d2008-05-01 20:00:01 +0000711
jadmanskif22fea82008-11-26 20:57:07 +0000712 autotest.BaseAutotest.set_install_in_tmpdir(
713 parser.options.install_in_tmpdir)
714
Dan Shia1ecd5c2013-06-06 11:21:31 -0700715 timer = None
716 try:
717 # Take the first argument as control file name, get the test name from
718 # the control file. If the test name exists in the list of tests with
719 # run time measurement enabled, start a timer to begin measurement.
720 if (len(parser.args) > 0 and parser.args[0] != '' and
721 parser.options.machines):
Dan Shibbc16132013-07-09 16:23:59 -0700722 try:
723 test_name = control_data.parse_control(parser.args[0],
724 raise_warnings=True).name
725 except control_data.ControlVariableException:
726 logging.debug('Failed to retrieve test name from control file.')
727 test_name = None
Dan Shia1ecd5c2013-06-06 11:21:31 -0700728 if test_name in measure_run_time_tests:
729 machines = parser.options.machines.replace(',', ' '
730 ).strip().split()
Dan Shi8eac5af2014-09-17 00:15:15 -0700731 try:
Kevin Cheng9b6930f2016-07-20 14:57:15 -0700732 afe = frontend.AFE()
Dan Shi8eac5af2014-09-17 00:15:15 -0700733 board = server_utils.get_board_from_afe(machines[0], afe)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800734 timer = autotest_stats.Timer('autoserv_run_time.%s.%s' %
735 (board, test_name))
Dan Shi8eac5af2014-09-17 00:15:15 -0700736 timer.start()
737 except (urllib2.HTTPError, urllib2.URLError):
738 # Ignore error if RPC failed to get board
739 pass
Dan Shia1ecd5c2013-06-06 11:21:31 -0700740 except control_data.ControlVariableException as e:
741 logging.error(str(e))
jadmanski0afbb632008-06-06 21:10:57 +0000742 exit_code = 0
Prashanth B6285f6a2014-05-08 18:01:27 -0700743 # TODO(beeps): Extend this to cover different failure modes.
744 # Testing exceptions are matched against labels sent to autoserv. Eg,
745 # to allow only the hostless job to run, specify
746 # testing_exceptions: test_suite in the shadow_config. To allow both
747 # the hostless job and dummy_Pass to run, specify
748 # testing_exceptions: test_suite,dummy_Pass. You can figure out
749 # what label autoserv is invoked with by looking through the logs of a test
750 # for the autoserv command's -l option.
Dan Shia06f3e22015-09-03 16:15:15 -0700751 testing_exceptions = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700752 'AUTOSERV', 'testing_exceptions', type=list, default=[])
Dan Shia06f3e22015-09-03 16:15:15 -0700753 test_mode = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700754 'AUTOSERV', 'testing_mode', type=bool, default=False)
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800755 test_mode = (results_mocker and test_mode and not
756 any([ex in parser.options.label
757 for ex in testing_exceptions]))
758 is_task = (parser.options.verify or parser.options.repair or
759 parser.options.provision or parser.options.reset or
760 parser.options.cleanup or parser.options.collect_crashinfo)
jadmanski0afbb632008-06-06 21:10:57 +0000761 try:
762 try:
Prashanth B6285f6a2014-05-08 18:01:27 -0700763 if test_mode:
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800764 # The parser doesn't run on tasks anyway, so we can just return
765 # happy signals without faking results.
766 if not is_task:
767 machine = parser.options.results.split('/')[-1]
768
769 # TODO(beeps): The proper way to do this would be to
770 # refactor job creation so we can invoke job.record
771 # directly. To do that one needs to pipe the test_name
772 # through run_autoserv and bail just before invoking
773 # the server job. See the comment in
774 # puppylab/results_mocker for more context.
775 results_mocker.ResultsMocker(
Prashanth Balasubramanian22dd2262014-11-28 18:19:18 -0800776 test_name if test_name else 'unknown-test',
777 parser.options.results, machine
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800778 ).mock_results()
779 return
Prashanth B6285f6a2014-05-08 18:01:27 -0700780 else:
Dan Shic68fefb2015-04-07 10:10:52 -0700781 run_autoserv(pid_file_manager, results, parser, ssp_url,
782 use_ssp)
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700783 except SystemExit as e:
jadmanski0afbb632008-06-06 21:10:57 +0000784 exit_code = e.code
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700785 if exit_code:
786 logging.exception(e)
787 except Exception as e:
jadmanski0afbb632008-06-06 21:10:57 +0000788 # If we don't know what happened, we'll classify it as
789 # an 'abort' and return 1.
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700790 logging.exception(e)
jadmanski0afbb632008-06-06 21:10:57 +0000791 exit_code = 1
792 finally:
mblighff7d61f2008-12-22 14:53:35 +0000793 if pid_file_manager:
794 pid_file_manager.close_file(exit_code)
Dan Shia1ecd5c2013-06-06 11:21:31 -0700795 if timer:
796 timer.stop()
Fang Deng042c1472014-10-23 13:56:41 -0700797 # Record the autoserv duration time. Must be called
798 # just before the system exits to ensure accuracy.
799 duration_secs = (datetime.datetime.now() - start_time).total_seconds()
800 record_autoserv(parser.options, duration_secs)
jadmanski0afbb632008-06-06 21:10:57 +0000801 sys.exit(exit_code)
mblighfaf0cd42007-11-19 16:00:24 +0000802
mblighbb421852008-03-11 22:36:16 +0000803
mbligha46678d2008-05-01 20:00:01 +0000804if __name__ == '__main__':
jadmanski0afbb632008-06-06 21:10:57 +0000805 main()