blob: 44fb1ec1507d4a632aca2f74e4363e9120e6a4c0 [file] [log] [blame]
mbligh6203ace2007-10-04 21:54:24 +00001#!/usr/bin/python -u
mbligh1ffd5dc2008-11-25 13:24:05 +00002# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
mbligh82648e52008-11-20 16:54:25 +00003# Released under the GPL v2
mblighdcd57a82007-07-11 23:06:47 +00004
mblighc8949b82007-07-23 16:33:58 +00005"""
Aviv Keshetde6bb192013-01-30 16:17:22 -08006Run a control file through the server side engine
mblighdcd57a82007-07-11 23:06:47 +00007"""
mbligh1ffd5dc2008-11-25 13:24:05 +00008
Fang Deng042c1472014-10-23 13:56:41 -07009import ast
10import datetime
11import getpass
12import logging
13import os
14import re
15import signal
Dan Shicf4d2032015-03-12 15:04:21 -070016import socket
Fang Deng042c1472014-10-23 13:56:41 -070017import sys
18import traceback
19import time
20import urllib2
mbligh1ffd5dc2008-11-25 13:24:05 +000021
mblighf5427bb2008-04-09 15:55:57 +000022import common
mbligh9ff89cd2009-09-03 20:28:17 +000023
Dan Shia1ecd5c2013-06-06 11:21:31 -070024from autotest_lib.client.common_lib import control_data
Dan Shi32649b82015-08-29 20:53:36 -070025from autotest_lib.client.common_lib import error
Dan Shia1ecd5c2013-06-06 11:21:31 -070026from autotest_lib.client.common_lib import global_config
Dan Shi5ddf6a32015-05-02 00:22:01 -070027from autotest_lib.client.common_lib import utils
Dan Shi37bee222015-04-13 15:46:47 -070028from autotest_lib.client.common_lib.cros.graphite import autotest_es
29from autotest_lib.client.common_lib.cros.graphite import autotest_stats
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -080030try:
31 from autotest_lib.puppylab import results_mocker
32except ImportError:
33 results_mocker = None
34
Dan Shia06f3e22015-09-03 16:15:15 -070035_CONFIG = global_config.global_config
36
37require_atfork = _CONFIG.get_config_value(
mblighcb8cb332009-09-03 21:08:56 +000038 'AUTOSERV', 'require_atfork_module', type=bool, default=True)
39
Dan Shia1ecd5c2013-06-06 11:21:31 -070040
Jakob Jueliche497b552014-09-23 19:11:59 -070041# Number of seconds to wait before returning if testing mode is enabled
Prashanth B6285f6a2014-05-08 18:01:27 -070042TESTING_MODE_SLEEP_SECS = 1
Jakob Jueliche497b552014-09-23 19:11:59 -070043
mblighcb8cb332009-09-03 21:08:56 +000044try:
45 import atfork
46 atfork.monkeypatch_os_fork_functions()
47 import atfork.stdlib_fixer
48 # Fix the Python standard library for threading+fork safety with its
49 # internal locks. http://code.google.com/p/python-atfork/
50 import warnings
51 warnings.filterwarnings('ignore', 'logging module already imported')
52 atfork.stdlib_fixer.fix_logging_module()
53except ImportError, e:
54 from autotest_lib.client.common_lib import global_config
Dan Shia06f3e22015-09-03 16:15:15 -070055 if _CONFIG.get_config_value(
mblighcb8cb332009-09-03 21:08:56 +000056 'AUTOSERV', 'require_atfork_module', type=bool, default=False):
57 print >>sys.stderr, 'Please run utils/build_externals.py'
58 print e
59 sys.exit(1)
mbligh9ff89cd2009-09-03 20:28:17 +000060
Dan Shia1ecd5c2013-06-06 11:21:31 -070061from autotest_lib.server import frontend
showard75cdfee2009-06-10 17:40:41 +000062from autotest_lib.server import server_logging_config
showard043c62a2009-06-10 19:48:57 +000063from autotest_lib.server import server_job, utils, autoserv_parser, autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070064from autotest_lib.server import utils as server_utils
Dan Shicf4d2032015-03-12 15:04:21 -070065from autotest_lib.site_utils import job_directories
Fang Deng042c1472014-10-23 13:56:41 -070066from autotest_lib.site_utils import job_overhead
Dan Shicf4d2032015-03-12 15:04:21 -070067from autotest_lib.site_utils import lxc
Dan Shi7836d252015-04-27 15:33:58 -070068from autotest_lib.site_utils import lxc_utils
showard75cdfee2009-06-10 17:40:41 +000069from autotest_lib.client.common_lib import pidfile, logging_manager
Gabe Black1e1c41b2015-02-04 23:55:15 -080070from autotest_lib.client.common_lib.cros.graphite import autotest_stats
mbligh92c0fc22008-11-20 16:52:23 +000071
Dan Shicf4d2032015-03-12 15:04:21 -070072# Control segment to stage server-side package.
73STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
74 'stage_server_side_package')
75
Dan Shia06f3e22015-09-03 16:15:15 -070076# Command line to start servod in a moblab.
77START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s'
78STOP_SERVOD_CMD = 'sudo stop servod'
79
Alex Millerf1af17e2013-01-09 22:50:32 -080080def log_alarm(signum, frame):
81 logging.error("Received SIGALARM. Ignoring and continuing on.")
Alex Miller0528d6f2013-01-11 10:49:48 -080082 sys.exit(1)
Alex Millerf1af17e2013-01-09 22:50:32 -080083
Dan Shicf4d2032015-03-12 15:04:21 -070084
85def _get_machines(parser):
86 """Get a list of machine names from command line arg -m or a file.
87
88 @param parser: Parser for the command line arguments.
89
90 @return: A list of machine names from command line arg -m or the
91 machines file specified in the command line arg -M.
92 """
93 if parser.options.machines:
94 machines = parser.options.machines.replace(',', ' ').strip().split()
95 else:
96 machines = []
97 machines_file = parser.options.machines_file
98 if machines_file:
99 machines = []
100 for m in open(machines_file, 'r').readlines():
101 # remove comments, spaces
102 m = re.sub('#.*', '', m).strip()
103 if m:
104 machines.append(m)
105 logging.debug('Read list of machines from file: %s', machines_file)
106 logging.debug('Machines: %s', ','.join(machines))
107
108 if machines:
109 for machine in machines:
110 if not machine or re.search('\s', machine):
111 parser.parser.error("Invalid machine: %s" % str(machine))
112 machines = list(set(machines))
113 machines.sort()
114 return machines
115
116
117def _stage_ssp(parser):
118 """Stage server-side package.
119
120 This function calls a control segment to stage server-side package based on
121 the job and autoserv command line option. The detail implementation could
122 be different for each host type. Currently, only CrosHost has
123 stage_server_side_package function defined.
124 The script returns None if no server-side package is available. However,
125 it may raise exception if it failed for reasons other than artifact (the
126 server-side package) not found.
127
128 @param parser: Command line arguments parser passed in the autoserv process.
129
130 @return: url of the staged server-side package. Return None if server-
131 side package is not found for the build.
132 """
Dan Shi36cfd832014-10-10 13:38:51 -0700133 # If test_source_build is not specified, default to use server-side test
134 # code from build specified in --image.
Dan Shicf4d2032015-03-12 15:04:21 -0700135 namespace = {'machines': _get_machines(parser),
Dan Shi36cfd832014-10-10 13:38:51 -0700136 'image': (parser.options.test_source_build or
137 parser.options.image),}
Dan Shicf4d2032015-03-12 15:04:21 -0700138 script_locals = {}
139 execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
140 return script_locals['ssp_url']
141
142
Dan Shiafa63872016-02-23 15:32:31 -0800143def _run_with_ssp(job, container_name, job_id, results, parser, ssp_url,
144 job_folder):
Dan Shicf4d2032015-03-12 15:04:21 -0700145 """Run the server job with server-side packaging.
146
Dan Shi37befda2015-12-07 13:16:56 -0800147 @param job: The server job object.
Dan Shicf4d2032015-03-12 15:04:21 -0700148 @param container_name: Name of the container to run the test.
149 @param job_id: ID of the test job.
150 @param results: Folder to store results. This could be different from
151 parser.options.results:
152 parser.options.results can be set to None for results to be
153 stored in a temp folder.
154 results can be None for autoserv run requires no logging.
155 @param parser: Command line parser that contains the options.
156 @param ssp_url: url of the staged server-side package.
Dan Shiafa63872016-02-23 15:32:31 -0800157 @param job_folder: Name of the job result folder.
Dan Shicf4d2032015-03-12 15:04:21 -0700158 """
159 bucket = lxc.ContainerBucket()
160 control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
161 else None)
Dan Shi37befda2015-12-07 13:16:56 -0800162 try:
163 test_container = bucket.setup_test(container_name, job_id, ssp_url,
Dan Shiafa63872016-02-23 15:32:31 -0800164 results, control=control,
165 job_folder=job_folder)
Dan Shi37befda2015-12-07 13:16:56 -0800166 except Exception as e:
167 job.record('FAIL', None, None,
168 'Failed to setup container for test: %s. Check logs in '
169 'ssp_logs folder for more details.' % e)
170 raise
171
Dan Shicf4d2032015-03-12 15:04:21 -0700172 args = sys.argv[:]
173 args.remove('--require-ssp')
Dan Shi77b79a62015-07-29 16:22:05 -0700174 # --parent_job_id is only useful in autoserv running in host, not in
175 # container. Include this argument will cause test to fail for builds before
176 # CL 286265 was merged.
177 if '--parent_job_id' in args:
178 index = args.index('--parent_job_id')
179 args.remove('--parent_job_id')
180 # Remove the actual parent job id in command line arg.
181 del args[index]
Dan Shicf4d2032015-03-12 15:04:21 -0700182
183 # A dictionary of paths to replace in the command line. Key is the path to
184 # be replaced with the one in value.
185 paths_to_replace = {}
186 # Replace the control file path with the one in container.
187 if control:
188 container_control_filename = os.path.join(
189 lxc.CONTROL_TEMP_PATH, os.path.basename(control))
190 paths_to_replace[control] = container_control_filename
191 # Update result directory with the one in container.
192 if parser.options.results:
Dan Shiafa63872016-02-23 15:32:31 -0800193 container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_folder)
Dan Shicf4d2032015-03-12 15:04:21 -0700194 paths_to_replace[parser.options.results] = container_result_dir
195 # Update parse_job directory with the one in container. The assumption is
196 # that the result folder to be parsed is always the same as the results_dir.
197 if parser.options.parse_job:
Dan Shiafa63872016-02-23 15:32:31 -0800198 container_parse_dir = os.path.join(lxc.RESULT_DIR_FMT % job_folder)
Dan Shicf4d2032015-03-12 15:04:21 -0700199 paths_to_replace[parser.options.parse_job] = container_result_dir
200
201 args = [paths_to_replace.get(arg, arg) for arg in args]
202
203 # Apply --use-existing-results, results directory is aready created and
204 # mounted in container. Apply this arg to avoid exception being raised.
205 if not '--use-existing-results' in args:
206 args.append('--use-existing-results')
207
208 # Make sure autoserv running in container using a different pid file.
209 if not '--pidfile-label' in args:
210 args.extend(['--pidfile-label', 'container_autoserv'])
211
Dan Shid1f51232015-04-18 00:29:14 -0700212 cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
Dan Shicf4d2032015-03-12 15:04:21 -0700213 logging.info('Run command in container: %s', cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700214 success = False
Dan Shicf4d2032015-03-12 15:04:21 -0700215 try:
216 test_container.attach_run(cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700217 success = True
Dan Shi9d3454e2015-12-08 09:16:08 -0800218 except Exception as e:
219 # If the test run inside container fails without generating any log,
220 # write a message to status.log to help troubleshooting.
221 debug_files = os.listdir(os.path.join(results, 'debug'))
222 if not debug_files:
223 job.record('FAIL', None, None,
224 'Failed to run test inside the container: %s. Check '
225 'logs in ssp_logs folder for more details.' % e)
226 raise
Dan Shicf4d2032015-03-12 15:04:21 -0700227 finally:
Dan Shi37bee222015-04-13 15:46:47 -0700228 counter_key = '%s.%s' % (lxc.STATS_KEY,
229 'success' if success else 'fail')
230 autotest_stats.Counter(counter_key).increment()
231 # metadata is uploaded separately so it can use http to upload.
232 metadata = {'drone': socket.gethostname(),
233 'job_id': job_id,
234 'success': success}
235 autotest_es.post(use_http=True,
236 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
237 metadata=metadata)
Dan Shicf4d2032015-03-12 15:04:21 -0700238 test_container.destroy()
239
240
Dan Shi3f1b8a52015-04-21 11:11:06 -0700241def correct_results_folder_permission(results):
242 """Make sure the results folder has the right permission settings.
243
244 For tests running with server-side packaging, the results folder has the
245 owner of root. This must be changed to the user running the autoserv
246 process, so parsing job can access the results folder.
247 TODO(dshi): crbug.com/459344 Remove this function when test container can be
248 unprivileged container.
249
250 @param results: Path to the results folder.
251
252 """
253 if not results:
254 return
255
Dan Shi32649b82015-08-29 20:53:36 -0700256 try:
257 utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
258 utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
259 except error.CmdError as e:
260 metadata = {'error': str(e),
261 'result_folder': results,
262 'drone': socket.gethostname()}
263 autotest_es.post(use_http=True, type_str='correct_results_folder_failure',
264 metadata=metadata)
265 raise
Dan Shi3f1b8a52015-04-21 11:11:06 -0700266
267
Dan Shia06f3e22015-09-03 16:15:15 -0700268def _start_servod(machine):
269 """Try to start servod in moblab if it's not already running or running with
270 different board or port.
271
272 @param machine: Name of the dut used for test.
273 """
274 if not utils.is_moblab():
275 return
276
Dan Shi1cded882015-09-23 16:52:26 -0700277 logging.debug('Trying to start servod.')
Dan Shia06f3e22015-09-03 16:15:15 -0700278 try:
279 afe = frontend.AFE()
280 board = server_utils.get_board_from_afe(machine, afe)
281 hosts = afe.get_hosts(hostname=machine)
282 servo_host = hosts[0].attributes.get('servo_host', None)
283 servo_port = hosts[0].attributes.get('servo_port', 9999)
284 if not servo_host in ['localhost', '127.0.0.1']:
Dan Shi1cded882015-09-23 16:52:26 -0700285 logging.warn('Starting servod is aborted. The dut\'s servo_host '
286 'attribute is not set to localhost.')
Dan Shia06f3e22015-09-03 16:15:15 -0700287 return
288 except (urllib2.HTTPError, urllib2.URLError):
289 # Ignore error if RPC failed to get board
290 logging.error('Failed to get board name from AFE. Start servod is '
291 'aborted')
292 return
293
294 try:
295 pid = utils.run('pgrep servod').stdout
296 cmd_line = utils.run('ps -fp %s' % pid).stdout
297 if ('--board %s' % board in cmd_line and
298 '--port %s' % servo_port in cmd_line):
299 logging.debug('Servod is already running with given board and port.'
300 ' There is no need to restart servod.')
301 return
302 logging.debug('Servod is running with different board or port. '
303 'Stopping existing servod.')
304 utils.run('sudo stop servod')
305 except error.CmdError:
306 # servod is not running.
307 pass
308
309 try:
310 utils.run(START_SERVOD_CMD % (board, servo_port))
311 logging.debug('Servod is started')
312 except error.CmdError as e:
313 logging.error('Servod failed to be started, error: %s', e)
314
315
Dan Shic68fefb2015-04-07 10:10:52 -0700316def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700317 """Run server job with given options.
318
319 @param pid_file_manager: PidFileManager used to monitor the autoserv process
320 @param results: Folder to store results.
321 @param parser: Parser for the command line arguments.
322 @param ssp_url: Url to server-side package.
Dan Shic68fefb2015-04-07 10:10:52 -0700323 @param use_ssp: Set to True to run with server-side packaging.
Dan Shicf4d2032015-03-12 15:04:21 -0700324 """
Dan Shiec1d47d2015-02-13 11:38:13 -0800325 if parser.options.warn_no_ssp:
Dan Shic68fefb2015-04-07 10:10:52 -0700326 # Post a warning in the log.
Dan Shiec1d47d2015-02-13 11:38:13 -0800327 logging.warn('Autoserv is required to run with server-side packaging. '
328 'However, no drone is found to support server-side '
329 'packaging. The test will be executed in a drone without '
330 'server-side packaging supported.')
331
jadmanski0afbb632008-06-06 21:10:57 +0000332 # send stdin to /dev/null
333 dev_null = os.open(os.devnull, os.O_RDONLY)
334 os.dup2(dev_null, sys.stdin.fileno())
335 os.close(dev_null)
mblighdbf37612007-11-24 19:38:11 +0000336
Dan Shie8aeb662016-06-30 11:22:03 -0700337 # Create separate process group if the process is not a process group
338 # leader. This allows autoserv process to keep running after the caller
339 # process (drone manager call) exits.
340 if os.getpid() != os.getpgid(0):
341 os.setsid()
mbligh1d42d4e2007-11-05 22:42:00 +0000342
Dan Shicf4d2032015-03-12 15:04:21 -0700343 # Container name is predefined so the container can be destroyed in
344 # handle_sigterm.
345 job_or_task_id = job_directories.get_job_id_or_task_id(
346 parser.options.results)
347 container_name = (lxc.TEST_CONTAINER_NAME_FMT %
Dan Shid68d51c2015-04-21 17:00:42 -0700348 (job_or_task_id, time.time(), os.getpid()))
Dan Shiafa63872016-02-23 15:32:31 -0800349 job_folder = job_directories.get_job_folder_name(parser.options.results)
Dan Shicf4d2032015-03-12 15:04:21 -0700350
jadmanski0afbb632008-06-06 21:10:57 +0000351 # Implement SIGTERM handler
mblighc2299562009-07-02 19:00:36 +0000352 def handle_sigterm(signum, frame):
Simran Basi9d9b7292013-10-16 16:44:07 -0700353 logging.debug('Received SIGTERM')
mblighff7d61f2008-12-22 14:53:35 +0000354 if pid_file_manager:
355 pid_file_manager.close_file(1, signal.SIGTERM)
Simran Basi49e21e62013-10-17 12:40:33 -0700356 logging.debug('Finished writing to pid_file. Killing process.')
Dan Shi3f1b8a52015-04-21 11:11:06 -0700357
358 # Update results folder's file permission. This needs to be done ASAP
359 # before the parsing process tries to access the log.
360 if use_ssp and results:
361 correct_results_folder_permission(results)
362
Simran Basid6b83772014-01-06 16:31:30 -0800363 # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
364 # This sleep allows the pending output to be logged before the kill
365 # signal is sent.
366 time.sleep(.1)
Dan Shic68fefb2015-04-07 10:10:52 -0700367 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700368 logging.debug('Destroy container %s before aborting the autoserv '
369 'process.', container_name)
Dan Shi3f1b8a52015-04-21 11:11:06 -0700370 metadata = {'drone': socket.gethostname(),
371 'job_id': job_or_task_id,
372 'container_name': container_name,
373 'action': 'abort',
374 'success': True}
Dan Shicf4d2032015-03-12 15:04:21 -0700375 try:
376 bucket = lxc.ContainerBucket()
377 container = bucket.get(container_name)
378 if container:
379 container.destroy()
380 else:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700381 metadata['success'] = False
382 metadata['error'] = 'container not found'
Dan Shicf4d2032015-03-12 15:04:21 -0700383 logging.debug('Container %s is not found.', container_name)
384 except:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700385 metadata['success'] = False
386 metadata['error'] = 'Exception: %s' % sys.exc_info()
Dan Shicf4d2032015-03-12 15:04:21 -0700387 # Handle any exception so the autoserv process can be aborted.
388 logging.error('Failed to destroy container %s. Error: %s',
389 container_name, sys.exc_info())
Dan Shi3f1b8a52015-04-21 11:11:06 -0700390 autotest_es.post(use_http=True,
391 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
392 metadata=metadata)
Dan Shie4a4f9f2015-07-20 09:00:25 -0700393 # Try to correct the result file permission again after the
394 # container is destroyed, as the container might have created some
395 # new files in the result folder.
396 if results:
397 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700398
jadmanski0afbb632008-06-06 21:10:57 +0000399 os.killpg(os.getpgrp(), signal.SIGKILL)
mblighfaf0cd42007-11-19 16:00:24 +0000400
jadmanski0afbb632008-06-06 21:10:57 +0000401 # Set signal handler
mblighc2299562009-07-02 19:00:36 +0000402 signal.signal(signal.SIGTERM, handle_sigterm)
mbligha46678d2008-05-01 20:00:01 +0000403
Simran Basid6b83772014-01-06 16:31:30 -0800404 # faulthandler is only needed to debug in the Lab and is not avaliable to
405 # be imported in the chroot as part of VMTest, so Try-Except it.
406 try:
407 import faulthandler
408 faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
409 logging.debug('faulthandler registered on SIGTERM.')
410 except ImportError:
Christopher Grant4beca022015-06-16 15:14:47 -0400411 sys.exc_clear()
Simran Basid6b83772014-01-06 16:31:30 -0800412
David Rochberg8a60d1e2011-02-01 14:22:07 -0500413 # Ignore SIGTTOU's generated by output from forked children.
414 signal.signal(signal.SIGTTOU, signal.SIG_IGN)
415
Alex Millerf1af17e2013-01-09 22:50:32 -0800416 # If we received a SIGALARM, let's be loud about it.
417 signal.signal(signal.SIGALRM, log_alarm)
418
mbligha5f5e542009-12-30 16:57:49 +0000419 # Server side tests that call shell scripts often depend on $USER being set
420 # but depending on how you launch your autotest scheduler it may not be set.
421 os.environ['USER'] = getpass.getuser()
422
mblighb2bea302008-07-24 20:25:57 +0000423 label = parser.options.label
mbligh374f3412009-05-13 21:29:45 +0000424 group_name = parser.options.group_name
mblighb2bea302008-07-24 20:25:57 +0000425 user = parser.options.user
426 client = parser.options.client
427 server = parser.options.server
jadmanski0afbb632008-06-06 21:10:57 +0000428 install_before = parser.options.install_before
mblighb2bea302008-07-24 20:25:57 +0000429 install_after = parser.options.install_after
430 verify = parser.options.verify
431 repair = parser.options.repair
showard45ae8192008-11-05 19:32:53 +0000432 cleanup = parser.options.cleanup
Alex Millercb79ba72013-05-29 14:43:00 -0700433 provision = parser.options.provision
Dan Shi07e09af2013-04-12 09:31:29 -0700434 reset = parser.options.reset
Alex Miller667b5f22014-02-28 15:33:39 -0800435 job_labels = parser.options.job_labels
mblighb2bea302008-07-24 20:25:57 +0000436 no_tee = parser.options.no_tee
jadmanski0afbb632008-06-06 21:10:57 +0000437 parse_job = parser.options.parse_job
mblighe7d9c602009-07-02 19:02:33 +0000438 execution_tag = parser.options.execution_tag
439 if not execution_tag:
440 execution_tag = parse_job
jadmanski0afbb632008-06-06 21:10:57 +0000441 ssh_user = parser.options.ssh_user
442 ssh_port = parser.options.ssh_port
443 ssh_pass = parser.options.ssh_pass
jadmanskidef0c3c2009-03-25 20:07:10 +0000444 collect_crashinfo = parser.options.collect_crashinfo
mblighe0cbc912010-03-11 18:03:07 +0000445 control_filename = parser.options.control_filename
Scott Zawalski91493c82013-01-25 16:15:20 -0500446 test_retry = parser.options.test_retry
beepscb6f1e22013-06-28 19:14:10 -0700447 verify_job_repo_url = parser.options.verify_job_repo_url
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700448 skip_crash_collection = parser.options.skip_crash_collection
Aviv Keshet18ee3142013-08-12 15:01:51 -0700449 ssh_verbosity = int(parser.options.ssh_verbosity)
Fang Deng6cc20de2013-09-06 15:47:32 -0700450 ssh_options = parser.options.ssh_options
Dan Shib669cbd2013-09-13 11:17:17 -0700451 no_use_packaging = parser.options.no_use_packaging
Simran Basi1bf60eb2015-12-01 16:39:29 -0800452 host_attributes = parser.options.host_attributes
453 in_lab = bool(parser.options.lab)
mbligha46678d2008-05-01 20:00:01 +0000454
mblighb2bea302008-07-24 20:25:57 +0000455 # can't be both a client and a server side test
456 if client and server:
Eric Li861b2d52011-02-04 14:50:35 -0800457 parser.parser.error("Can not specify a test as both server and client!")
mblighb2bea302008-07-24 20:25:57 +0000458
Alex Millercb79ba72013-05-29 14:43:00 -0700459 if provision and client:
460 parser.parser.error("Cannot specify provisioning and client!")
461
462 is_special_task = (verify or repair or cleanup or collect_crashinfo or
Dan Shi07e09af2013-04-12 09:31:29 -0700463 provision or reset)
Alex Millercb79ba72013-05-29 14:43:00 -0700464 if len(parser.args) < 1 and not is_special_task:
Eric Li861b2d52011-02-04 14:50:35 -0800465 parser.parser.error("Missing argument: control file")
mbligha46678d2008-05-01 20:00:01 +0000466
Aviv Keshet18ee3142013-08-12 15:01:51 -0700467 if ssh_verbosity > 0:
468 # ssh_verbosity is an integer between 0 and 3, inclusive
469 ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
Fang Dengd1c2b732013-08-20 12:59:46 -0700470 else:
471 ssh_verbosity_flag = ''
Aviv Keshet18ee3142013-08-12 15:01:51 -0700472
showard45ae8192008-11-05 19:32:53 +0000473 # We have a control file unless it's just a verify/repair/cleanup job
jadmanski0afbb632008-06-06 21:10:57 +0000474 if len(parser.args) > 0:
475 control = parser.args[0]
476 else:
477 control = None
mbligha46678d2008-05-01 20:00:01 +0000478
Dan Shicf4d2032015-03-12 15:04:21 -0700479 machines = _get_machines(parser)
mbligh374f3412009-05-13 21:29:45 +0000480 if group_name and len(machines) < 2:
Dan Shicf4d2032015-03-12 15:04:21 -0700481 parser.parser.error('-G %r may only be supplied with more than one '
482 'machine.' % group_name)
mbligh374f3412009-05-13 21:29:45 +0000483
Christopher Wiley8a91f232013-07-09 11:02:27 -0700484 kwargs = {'group_name': group_name, 'tag': execution_tag,
Dan Shicf4d2032015-03-12 15:04:21 -0700485 'disable_sysinfo': parser.options.disable_sysinfo}
Dan Shi70647ca2015-07-16 22:52:35 -0700486 if parser.options.parent_job_id:
487 kwargs['parent_job_id'] = int(parser.options.parent_job_id)
mblighe0cbc912010-03-11 18:03:07 +0000488 if control_filename:
489 kwargs['control_filename'] = control_filename
Simran Basi1bf60eb2015-12-01 16:39:29 -0800490 if host_attributes:
491 kwargs['host_attributes'] = host_attributes
492 kwargs['in_lab'] = in_lab
jadmanski0afbb632008-06-06 21:10:57 +0000493 job = server_job.server_job(control, parser.args[1:], results, label,
494 user, machines, client, parse_job,
Fang Dengd1c2b732013-08-20 12:59:46 -0700495 ssh_user, ssh_port, ssh_pass,
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700496 ssh_verbosity_flag, ssh_options,
497 test_retry, **kwargs)
Dan Shicf4d2032015-03-12 15:04:21 -0700498
showard75cdfee2009-06-10 17:40:41 +0000499 job.logging.start_logging()
mbligh4608b002010-01-05 18:22:35 +0000500 job.init_parser()
mbligha46678d2008-05-01 20:00:01 +0000501
mbligh161fe6f2008-06-19 16:26:04 +0000502 # perform checks
503 job.precheck()
504
jadmanski0afbb632008-06-06 21:10:57 +0000505 # run the job
506 exit_code = 0
Dan Shic1b8bdd2015-09-14 23:11:24 -0700507 auto_start_servod = _CONFIG.get_config_value(
508 'AUTOSERV', 'auto_start_servod', type=bool, default=False)
jadmanski0afbb632008-06-06 21:10:57 +0000509 try:
mbligh332000a2009-06-08 16:47:28 +0000510 try:
511 if repair:
Dan Shic1b8bdd2015-09-14 23:11:24 -0700512 if auto_start_servod and len(machines) == 1:
513 _start_servod(machines[0])
J. Richard Barnettec2d99cf2015-11-18 12:46:15 -0800514 job.repair(job_labels)
mbligh332000a2009-06-08 16:47:28 +0000515 elif verify:
Alex Miller667b5f22014-02-28 15:33:39 -0800516 job.verify(job_labels)
Alex Millercb79ba72013-05-29 14:43:00 -0700517 elif provision:
Alex Miller667b5f22014-02-28 15:33:39 -0800518 job.provision(job_labels)
Dan Shi07e09af2013-04-12 09:31:29 -0700519 elif reset:
Alex Miller667b5f22014-02-28 15:33:39 -0800520 job.reset(job_labels)
Fang Dengad78aca2014-10-02 18:15:46 -0700521 elif cleanup:
522 job.cleanup(job_labels)
mbligh332000a2009-06-08 16:47:28 +0000523 else:
Dan Shia06f3e22015-09-03 16:15:15 -0700524 if auto_start_servod and len(machines) == 1:
525 _start_servod(machines[0])
Dan Shic68fefb2015-04-07 10:10:52 -0700526 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700527 try:
Dan Shi37befda2015-12-07 13:16:56 -0800528 _run_with_ssp(job, container_name, job_or_task_id,
Dan Shiafa63872016-02-23 15:32:31 -0800529 results, parser, ssp_url, job_folder)
Dan Shicf4d2032015-03-12 15:04:21 -0700530 finally:
531 # Update the ownership of files in result folder.
Dan Shi3f1b8a52015-04-21 11:11:06 -0700532 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700533 else:
Dan Shiafa63872016-02-23 15:32:31 -0800534 if collect_crashinfo:
535 # Update the ownership of files in result folder. If the
536 # job to collect crashinfo was running inside container
537 # (SSP) and crashed before correcting folder permission,
538 # the result folder might have wrong permission setting.
539 try:
540 correct_results_folder_permission(results)
541 except:
542 # Ignore any error as the user may not have root
543 # permission to run sudo command.
544 pass
Dan Shicf4d2032015-03-12 15:04:21 -0700545 job.run(install_before, install_after,
546 verify_job_repo_url=verify_job_repo_url,
547 only_collect_crashinfo=collect_crashinfo,
548 skip_crash_collection=skip_crash_collection,
Dan Shib669cbd2013-09-13 11:17:17 -0700549 job_labels=job_labels,
550 use_packaging=(not no_use_packaging))
mbligh332000a2009-06-08 16:47:28 +0000551 finally:
552 while job.hosts:
553 host = job.hosts.pop()
554 host.close()
jadmanski0afbb632008-06-06 21:10:57 +0000555 except:
jadmanski27b37ea2008-10-29 23:54:31 +0000556 exit_code = 1
jadmanski0afbb632008-06-06 21:10:57 +0000557 traceback.print_exc()
mbligha46678d2008-05-01 20:00:01 +0000558
mblighff7d61f2008-12-22 14:53:35 +0000559 if pid_file_manager:
560 pid_file_manager.num_tests_failed = job.num_tests_failed
561 pid_file_manager.close_file(exit_code)
jadmanskie0dffc32008-12-15 17:30:30 +0000562 job.cleanup_parser()
showard21baa452008-10-21 00:08:39 +0000563
jadmanski27b37ea2008-10-29 23:54:31 +0000564 sys.exit(exit_code)
mbligha46678d2008-05-01 20:00:01 +0000565
566
Fang Deng042c1472014-10-23 13:56:41 -0700567def record_autoserv(options, duration_secs):
568 """Record autoserv end-to-end time in metadata db.
569
570 @param options: parser options.
571 @param duration_secs: How long autoserv has taken, in secs.
572 """
573 # Get machine hostname
574 machines = options.machines.replace(
575 ',', ' ').strip().split() if options.machines else []
576 num_machines = len(machines)
577 if num_machines > 1:
578 # Skip the case where atomic group is used.
579 return
580 elif num_machines == 0:
581 machines.append('hostless')
582
583 # Determine the status that will be reported.
584 s = job_overhead.STATUS
585 task_mapping = {
586 'reset': s.RESETTING, 'verify': s.VERIFYING,
587 'provision': s.PROVISIONING, 'repair': s.REPAIRING,
588 'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
Dan Shi888cfca2015-07-31 15:49:00 -0700589 match = filter(lambda task: getattr(options, task, False) == True,
590 task_mapping)
Fang Deng042c1472014-10-23 13:56:41 -0700591 status = task_mapping[match[0]] if match else s.RUNNING
592 is_special_task = status not in [s.RUNNING, s.GATHERING]
Dan Shicf4d2032015-03-12 15:04:21 -0700593 job_or_task_id = job_directories.get_job_id_or_task_id(options.results)
Fang Deng042c1472014-10-23 13:56:41 -0700594 job_overhead.record_state_duration(
595 job_or_task_id, machines[0], status, duration_secs,
596 is_special_task=is_special_task)
597
598
mbligha46678d2008-05-01 20:00:01 +0000599def main():
Fang Deng042c1472014-10-23 13:56:41 -0700600 start_time = datetime.datetime.now()
Dan Shia1ecd5c2013-06-06 11:21:31 -0700601 # White list of tests with run time measurement enabled.
Dan Shia06f3e22015-09-03 16:15:15 -0700602 measure_run_time_tests_names = _CONFIG.get_config_value(
603 'AUTOSERV', 'measure_run_time_tests', type=str)
Dan Shia1ecd5c2013-06-06 11:21:31 -0700604 if measure_run_time_tests_names:
605 measure_run_time_tests = [t.strip() for t in
606 measure_run_time_tests_names.split(',')]
607 else:
608 measure_run_time_tests = []
jadmanski0afbb632008-06-06 21:10:57 +0000609 # grab the parser
610 parser = autoserv_parser.autoserv_parser
mbligha5cb4062009-02-17 15:53:39 +0000611 parser.parse_args()
mbligha46678d2008-05-01 20:00:01 +0000612
jadmanski0afbb632008-06-06 21:10:57 +0000613 if len(sys.argv) == 1:
614 parser.parser.print_help()
615 sys.exit(1)
mbligha6f13082008-06-05 23:53:46 +0000616
Dan Shicf4d2032015-03-12 15:04:21 -0700617 # If the job requires to run with server-side package, try to stage server-
618 # side package first. If that fails with error that autotest server package
Dan Shic68fefb2015-04-07 10:10:52 -0700619 # does not exist, fall back to run the job without using server-side
620 # packaging. If option warn_no_ssp is specified, that means autoserv is
621 # running in a drone does not support SSP, thus no need to stage server-side
622 # package.
Dan Shicf4d2032015-03-12 15:04:21 -0700623 ssp_url = None
Dan Shi0b754c52015-04-20 14:20:38 -0700624 ssp_url_warning = False
Dan Shic68fefb2015-04-07 10:10:52 -0700625 if (not parser.options.warn_no_ssp and parser.options.require_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700626 ssp_url = _stage_ssp(parser)
Dan Shi0b754c52015-04-20 14:20:38 -0700627 # The build does not have autotest server package. Fall back to not
628 # to use server-side package. Logging is postponed until logging being
629 # set up.
630 ssp_url_warning = not ssp_url
Dan Shicf4d2032015-03-12 15:04:21 -0700631
showard75cdfee2009-06-10 17:40:41 +0000632 if parser.options.no_logging:
633 results = None
634 else:
635 results = parser.options.results
mbligh80e1eba2008-11-19 00:26:18 +0000636 if not results:
637 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
638 results = os.path.abspath(results)
showard566d3c02010-01-12 18:57:01 +0000639 resultdir_exists = False
640 for filename in ('control.srv', 'status.log', '.autoserv_execute'):
641 if os.path.exists(os.path.join(results, filename)):
642 resultdir_exists = True
mbligh4608b002010-01-05 18:22:35 +0000643 if not parser.options.use_existing_results and resultdir_exists:
mbligh80e1eba2008-11-19 00:26:18 +0000644 error = "Error: results directory already exists: %s\n" % results
645 sys.stderr.write(error)
646 sys.exit(1)
mbligha788dc42009-03-26 21:10:16 +0000647
648 # Now that we certified that there's no leftover results dir from
649 # previous jobs, lets create the result dir since the logging system
650 # needs to create the log file in there.
651 if not os.path.isdir(results):
652 os.makedirs(results)
showard75cdfee2009-06-10 17:40:41 +0000653
Dan Shic68fefb2015-04-07 10:10:52 -0700654 # Server-side packaging will only be used if it's required and the package
655 # is available. If warn_no_ssp is specified, it means that autoserv is
656 # running in a drone does not have SSP supported and a warning will be logs.
657 # Therefore, it should not run with SSP.
658 use_ssp = (not parser.options.warn_no_ssp and parser.options.require_ssp
659 and ssp_url)
660 if use_ssp:
Dan Shie28de552015-05-06 16:51:58 -0700661 log_dir = os.path.join(results, 'ssp_logs') if results else None
Dan Shicf4d2032015-03-12 15:04:21 -0700662 if log_dir and not os.path.exists(log_dir):
663 os.makedirs(log_dir)
664 else:
665 log_dir = results
Dan Shi3f1b8a52015-04-21 11:11:06 -0700666
showard75cdfee2009-06-10 17:40:41 +0000667 logging_manager.configure_logging(
Dan Shicf4d2032015-03-12 15:04:21 -0700668 server_logging_config.ServerLoggingConfig(),
669 results_dir=log_dir,
showard10d84172009-06-18 23:16:50 +0000670 use_console=not parser.options.no_tee,
671 verbose=parser.options.verbose,
672 no_console_prefix=parser.options.no_console_prefix)
Dan Shicf4d2032015-03-12 15:04:21 -0700673
Dan Shi0b754c52015-04-20 14:20:38 -0700674 if ssp_url_warning:
675 logging.warn(
676 'Autoserv is required to run with server-side packaging. '
677 'However, no server-side package can be found based on '
Dan Shi6450e142016-03-11 11:52:20 -0800678 '`--image`, host attribute job_repo_url or host OS version '
679 'label. It could be that the build to test is older than the '
680 'minimum version that supports server-side packaging. The test '
681 'will be executed without using erver-side packaging.')
Dan Shi0b754c52015-04-20 14:20:38 -0700682
showard75cdfee2009-06-10 17:40:41 +0000683 if results:
mbligha788dc42009-03-26 21:10:16 +0000684 logging.info("Results placed in %s" % results)
mbligh10717632008-11-19 00:21:57 +0000685
mbligh4608b002010-01-05 18:22:35 +0000686 # wait until now to perform this check, so it get properly logged
Dan Shicf4d2032015-03-12 15:04:21 -0700687 if (parser.options.use_existing_results and not resultdir_exists and
Dan Shiff78f112015-06-12 13:34:02 -0700688 not utils.is_in_container()):
mbligh4608b002010-01-05 18:22:35 +0000689 logging.error("No existing results directory found: %s", results)
690 sys.exit(1)
691
Dan Shicf4d2032015-03-12 15:04:21 -0700692 logging.debug('autoserv is running in drone %s.', socket.gethostname())
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700693 logging.debug('autoserv command was: %s', ' '.join(sys.argv))
mbligh4608b002010-01-05 18:22:35 +0000694
Dan Shicf4d2032015-03-12 15:04:21 -0700695 if parser.options.write_pidfile and results:
mbligh4608b002010-01-05 18:22:35 +0000696 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
697 results)
jadmanskid5ab8c52008-12-03 16:27:07 +0000698 pid_file_manager.open_file()
mblighff7d61f2008-12-22 14:53:35 +0000699 else:
700 pid_file_manager = None
mbligha46678d2008-05-01 20:00:01 +0000701
jadmanskif22fea82008-11-26 20:57:07 +0000702 autotest.BaseAutotest.set_install_in_tmpdir(
703 parser.options.install_in_tmpdir)
704
Dan Shia1ecd5c2013-06-06 11:21:31 -0700705 timer = None
706 try:
707 # Take the first argument as control file name, get the test name from
708 # the control file. If the test name exists in the list of tests with
709 # run time measurement enabled, start a timer to begin measurement.
710 if (len(parser.args) > 0 and parser.args[0] != '' and
711 parser.options.machines):
Dan Shibbc16132013-07-09 16:23:59 -0700712 try:
713 test_name = control_data.parse_control(parser.args[0],
714 raise_warnings=True).name
715 except control_data.ControlVariableException:
716 logging.debug('Failed to retrieve test name from control file.')
717 test_name = None
Dan Shia1ecd5c2013-06-06 11:21:31 -0700718 if test_name in measure_run_time_tests:
719 machines = parser.options.machines.replace(',', ' '
720 ).strip().split()
Dan Shi8eac5af2014-09-17 00:15:15 -0700721 try:
722 afe = frontend.AFE()
723 board = server_utils.get_board_from_afe(machines[0], afe)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800724 timer = autotest_stats.Timer('autoserv_run_time.%s.%s' %
725 (board, test_name))
Dan Shi8eac5af2014-09-17 00:15:15 -0700726 timer.start()
727 except (urllib2.HTTPError, urllib2.URLError):
728 # Ignore error if RPC failed to get board
729 pass
Dan Shia1ecd5c2013-06-06 11:21:31 -0700730 except control_data.ControlVariableException as e:
731 logging.error(str(e))
jadmanski0afbb632008-06-06 21:10:57 +0000732 exit_code = 0
Prashanth B6285f6a2014-05-08 18:01:27 -0700733 # TODO(beeps): Extend this to cover different failure modes.
734 # Testing exceptions are matched against labels sent to autoserv. Eg,
735 # to allow only the hostless job to run, specify
736 # testing_exceptions: test_suite in the shadow_config. To allow both
737 # the hostless job and dummy_Pass to run, specify
738 # testing_exceptions: test_suite,dummy_Pass. You can figure out
739 # what label autoserv is invoked with by looking through the logs of a test
740 # for the autoserv command's -l option.
Dan Shia06f3e22015-09-03 16:15:15 -0700741 testing_exceptions = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700742 'AUTOSERV', 'testing_exceptions', type=list, default=[])
Dan Shia06f3e22015-09-03 16:15:15 -0700743 test_mode = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700744 'AUTOSERV', 'testing_mode', type=bool, default=False)
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800745 test_mode = (results_mocker and test_mode and not
746 any([ex in parser.options.label
747 for ex in testing_exceptions]))
748 is_task = (parser.options.verify or parser.options.repair or
749 parser.options.provision or parser.options.reset or
750 parser.options.cleanup or parser.options.collect_crashinfo)
jadmanski0afbb632008-06-06 21:10:57 +0000751 try:
752 try:
Prashanth B6285f6a2014-05-08 18:01:27 -0700753 if test_mode:
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800754 # The parser doesn't run on tasks anyway, so we can just return
755 # happy signals without faking results.
756 if not is_task:
757 machine = parser.options.results.split('/')[-1]
758
759 # TODO(beeps): The proper way to do this would be to
760 # refactor job creation so we can invoke job.record
761 # directly. To do that one needs to pipe the test_name
762 # through run_autoserv and bail just before invoking
763 # the server job. See the comment in
764 # puppylab/results_mocker for more context.
765 results_mocker.ResultsMocker(
Prashanth Balasubramanian22dd2262014-11-28 18:19:18 -0800766 test_name if test_name else 'unknown-test',
767 parser.options.results, machine
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800768 ).mock_results()
769 return
Prashanth B6285f6a2014-05-08 18:01:27 -0700770 else:
Dan Shic68fefb2015-04-07 10:10:52 -0700771 run_autoserv(pid_file_manager, results, parser, ssp_url,
772 use_ssp)
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700773 except SystemExit as e:
jadmanski0afbb632008-06-06 21:10:57 +0000774 exit_code = e.code
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700775 if exit_code:
776 logging.exception(e)
777 except Exception as e:
jadmanski0afbb632008-06-06 21:10:57 +0000778 # If we don't know what happened, we'll classify it as
779 # an 'abort' and return 1.
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700780 logging.exception(e)
jadmanski0afbb632008-06-06 21:10:57 +0000781 exit_code = 1
782 finally:
mblighff7d61f2008-12-22 14:53:35 +0000783 if pid_file_manager:
784 pid_file_manager.close_file(exit_code)
Dan Shia1ecd5c2013-06-06 11:21:31 -0700785 if timer:
786 timer.stop()
Fang Deng042c1472014-10-23 13:56:41 -0700787 # Record the autoserv duration time. Must be called
788 # just before the system exits to ensure accuracy.
789 duration_secs = (datetime.datetime.now() - start_time).total_seconds()
790 record_autoserv(parser.options, duration_secs)
jadmanski0afbb632008-06-06 21:10:57 +0000791 sys.exit(exit_code)
mblighfaf0cd42007-11-19 16:00:24 +0000792
mblighbb421852008-03-11 22:36:16 +0000793
mbligha46678d2008-05-01 20:00:01 +0000794if __name__ == '__main__':
jadmanski0afbb632008-06-06 21:10:57 +0000795 main()