blob: 889ff07fc9809749d5e25f5813678f0665971a9b [file] [log] [blame]
mbligh6203ace2007-10-04 21:54:24 +00001#!/usr/bin/python -u
mbligh1ffd5dc2008-11-25 13:24:05 +00002# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
mbligh82648e52008-11-20 16:54:25 +00003# Released under the GPL v2
mblighdcd57a82007-07-11 23:06:47 +00004
mblighc8949b82007-07-23 16:33:58 +00005"""
Aviv Keshetde6bb192013-01-30 16:17:22 -08006Run a control file through the server side engine
mblighdcd57a82007-07-11 23:06:47 +00007"""
mbligh1ffd5dc2008-11-25 13:24:05 +00008
Fang Deng042c1472014-10-23 13:56:41 -07009import ast
10import datetime
11import getpass
12import logging
13import os
14import re
15import signal
Dan Shicf4d2032015-03-12 15:04:21 -070016import socket
Fang Deng042c1472014-10-23 13:56:41 -070017import sys
18import traceback
19import time
20import urllib2
mbligh1ffd5dc2008-11-25 13:24:05 +000021
mblighf5427bb2008-04-09 15:55:57 +000022import common
mbligh9ff89cd2009-09-03 20:28:17 +000023
Dan Shia1ecd5c2013-06-06 11:21:31 -070024from autotest_lib.client.common_lib import control_data
Dan Shi32649b82015-08-29 20:53:36 -070025from autotest_lib.client.common_lib import error
Dan Shia1ecd5c2013-06-06 11:21:31 -070026from autotest_lib.client.common_lib import global_config
Dan Shi5ddf6a32015-05-02 00:22:01 -070027from autotest_lib.client.common_lib import utils
Dan Shi37bee222015-04-13 15:46:47 -070028from autotest_lib.client.common_lib.cros.graphite import autotest_es
29from autotest_lib.client.common_lib.cros.graphite import autotest_stats
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -080030try:
31 from autotest_lib.puppylab import results_mocker
32except ImportError:
33 results_mocker = None
34
Dan Shia06f3e22015-09-03 16:15:15 -070035_CONFIG = global_config.global_config
36
37require_atfork = _CONFIG.get_config_value(
mblighcb8cb332009-09-03 21:08:56 +000038 'AUTOSERV', 'require_atfork_module', type=bool, default=True)
39
Dan Shia1ecd5c2013-06-06 11:21:31 -070040
Jakob Jueliche497b552014-09-23 19:11:59 -070041# Number of seconds to wait before returning if testing mode is enabled
Prashanth B6285f6a2014-05-08 18:01:27 -070042TESTING_MODE_SLEEP_SECS = 1
Jakob Jueliche497b552014-09-23 19:11:59 -070043
mblighcb8cb332009-09-03 21:08:56 +000044try:
45 import atfork
46 atfork.monkeypatch_os_fork_functions()
47 import atfork.stdlib_fixer
48 # Fix the Python standard library for threading+fork safety with its
49 # internal locks. http://code.google.com/p/python-atfork/
50 import warnings
51 warnings.filterwarnings('ignore', 'logging module already imported')
52 atfork.stdlib_fixer.fix_logging_module()
53except ImportError, e:
54 from autotest_lib.client.common_lib import global_config
Dan Shia06f3e22015-09-03 16:15:15 -070055 if _CONFIG.get_config_value(
mblighcb8cb332009-09-03 21:08:56 +000056 'AUTOSERV', 'require_atfork_module', type=bool, default=False):
57 print >>sys.stderr, 'Please run utils/build_externals.py'
58 print e
59 sys.exit(1)
mbligh9ff89cd2009-09-03 20:28:17 +000060
Dan Shia1ecd5c2013-06-06 11:21:31 -070061from autotest_lib.server import frontend
showard75cdfee2009-06-10 17:40:41 +000062from autotest_lib.server import server_logging_config
showard043c62a2009-06-10 19:48:57 +000063from autotest_lib.server import server_job, utils, autoserv_parser, autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070064from autotest_lib.server import utils as server_utils
Dan Shicf4d2032015-03-12 15:04:21 -070065from autotest_lib.site_utils import job_directories
Fang Deng042c1472014-10-23 13:56:41 -070066from autotest_lib.site_utils import job_overhead
Dan Shicf4d2032015-03-12 15:04:21 -070067from autotest_lib.site_utils import lxc
Dan Shi7836d252015-04-27 15:33:58 -070068from autotest_lib.site_utils import lxc_utils
showard75cdfee2009-06-10 17:40:41 +000069from autotest_lib.client.common_lib import pidfile, logging_manager
Gabe Black1e1c41b2015-02-04 23:55:15 -080070from autotest_lib.client.common_lib.cros.graphite import autotest_stats
mbligh92c0fc22008-11-20 16:52:23 +000071
Dan Shicf4d2032015-03-12 15:04:21 -070072# Control segment to stage server-side package.
73STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
74 'stage_server_side_package')
75
Dan Shia06f3e22015-09-03 16:15:15 -070076# Command line to start servod in a moblab.
77START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s'
78STOP_SERVOD_CMD = 'sudo stop servod'
79
Alex Millerf1af17e2013-01-09 22:50:32 -080080def log_alarm(signum, frame):
81 logging.error("Received SIGALARM. Ignoring and continuing on.")
Alex Miller0528d6f2013-01-11 10:49:48 -080082 sys.exit(1)
Alex Millerf1af17e2013-01-09 22:50:32 -080083
Dan Shicf4d2032015-03-12 15:04:21 -070084
85def _get_machines(parser):
86 """Get a list of machine names from command line arg -m or a file.
87
88 @param parser: Parser for the command line arguments.
89
90 @return: A list of machine names from command line arg -m or the
91 machines file specified in the command line arg -M.
92 """
93 if parser.options.machines:
94 machines = parser.options.machines.replace(',', ' ').strip().split()
95 else:
96 machines = []
97 machines_file = parser.options.machines_file
98 if machines_file:
99 machines = []
100 for m in open(machines_file, 'r').readlines():
101 # remove comments, spaces
102 m = re.sub('#.*', '', m).strip()
103 if m:
104 machines.append(m)
105 logging.debug('Read list of machines from file: %s', machines_file)
106 logging.debug('Machines: %s', ','.join(machines))
107
108 if machines:
109 for machine in machines:
110 if not machine or re.search('\s', machine):
111 parser.parser.error("Invalid machine: %s" % str(machine))
112 machines = list(set(machines))
113 machines.sort()
114 return machines
115
116
117def _stage_ssp(parser):
118 """Stage server-side package.
119
120 This function calls a control segment to stage server-side package based on
121 the job and autoserv command line option. The detail implementation could
122 be different for each host type. Currently, only CrosHost has
123 stage_server_side_package function defined.
124 The script returns None if no server-side package is available. However,
125 it may raise exception if it failed for reasons other than artifact (the
126 server-side package) not found.
127
128 @param parser: Command line arguments parser passed in the autoserv process.
129
130 @return: url of the staged server-side package. Return None if server-
131 side package is not found for the build.
132 """
Dan Shi36cfd832014-10-10 13:38:51 -0700133 # If test_source_build is not specified, default to use server-side test
134 # code from build specified in --image.
Dan Shicf4d2032015-03-12 15:04:21 -0700135 namespace = {'machines': _get_machines(parser),
Dan Shi36cfd832014-10-10 13:38:51 -0700136 'image': (parser.options.test_source_build or
137 parser.options.image),}
Dan Shicf4d2032015-03-12 15:04:21 -0700138 script_locals = {}
139 execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
140 return script_locals['ssp_url']
141
142
Dan Shi37befda2015-12-07 13:16:56 -0800143def _run_with_ssp(job, container_name, job_id, results, parser, ssp_url):
Dan Shicf4d2032015-03-12 15:04:21 -0700144 """Run the server job with server-side packaging.
145
Dan Shi37befda2015-12-07 13:16:56 -0800146 @param job: The server job object.
Dan Shicf4d2032015-03-12 15:04:21 -0700147 @param container_name: Name of the container to run the test.
148 @param job_id: ID of the test job.
149 @param results: Folder to store results. This could be different from
150 parser.options.results:
151 parser.options.results can be set to None for results to be
152 stored in a temp folder.
153 results can be None for autoserv run requires no logging.
154 @param parser: Command line parser that contains the options.
155 @param ssp_url: url of the staged server-side package.
156 """
157 bucket = lxc.ContainerBucket()
158 control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
159 else None)
Dan Shi37befda2015-12-07 13:16:56 -0800160 try:
161 test_container = bucket.setup_test(container_name, job_id, ssp_url,
162 results, control=control)
163 except Exception as e:
164 job.record('FAIL', None, None,
165 'Failed to setup container for test: %s. Check logs in '
166 'ssp_logs folder for more details.' % e)
167 raise
168
Dan Shicf4d2032015-03-12 15:04:21 -0700169 args = sys.argv[:]
170 args.remove('--require-ssp')
Dan Shi77b79a62015-07-29 16:22:05 -0700171 # --parent_job_id is only useful in autoserv running in host, not in
172 # container. Include this argument will cause test to fail for builds before
173 # CL 286265 was merged.
174 if '--parent_job_id' in args:
175 index = args.index('--parent_job_id')
176 args.remove('--parent_job_id')
177 # Remove the actual parent job id in command line arg.
178 del args[index]
Dan Shicf4d2032015-03-12 15:04:21 -0700179
180 # A dictionary of paths to replace in the command line. Key is the path to
181 # be replaced with the one in value.
182 paths_to_replace = {}
183 # Replace the control file path with the one in container.
184 if control:
185 container_control_filename = os.path.join(
186 lxc.CONTROL_TEMP_PATH, os.path.basename(control))
187 paths_to_replace[control] = container_control_filename
188 # Update result directory with the one in container.
189 if parser.options.results:
190 container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_id)
191 paths_to_replace[parser.options.results] = container_result_dir
192 # Update parse_job directory with the one in container. The assumption is
193 # that the result folder to be parsed is always the same as the results_dir.
194 if parser.options.parse_job:
195 container_parse_dir = os.path.join(lxc.RESULT_DIR_FMT % job_id)
196 paths_to_replace[parser.options.parse_job] = container_result_dir
197
198 args = [paths_to_replace.get(arg, arg) for arg in args]
199
200 # Apply --use-existing-results, results directory is aready created and
201 # mounted in container. Apply this arg to avoid exception being raised.
202 if not '--use-existing-results' in args:
203 args.append('--use-existing-results')
204
205 # Make sure autoserv running in container using a different pid file.
206 if not '--pidfile-label' in args:
207 args.extend(['--pidfile-label', 'container_autoserv'])
208
Dan Shid1f51232015-04-18 00:29:14 -0700209 cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
Dan Shicf4d2032015-03-12 15:04:21 -0700210 logging.info('Run command in container: %s', cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700211 success = False
Dan Shicf4d2032015-03-12 15:04:21 -0700212 try:
213 test_container.attach_run(cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700214 success = True
Dan Shi9d3454e2015-12-08 09:16:08 -0800215 except Exception as e:
216 # If the test run inside container fails without generating any log,
217 # write a message to status.log to help troubleshooting.
218 debug_files = os.listdir(os.path.join(results, 'debug'))
219 if not debug_files:
220 job.record('FAIL', None, None,
221 'Failed to run test inside the container: %s. Check '
222 'logs in ssp_logs folder for more details.' % e)
223 raise
Dan Shicf4d2032015-03-12 15:04:21 -0700224 finally:
Dan Shi37bee222015-04-13 15:46:47 -0700225 counter_key = '%s.%s' % (lxc.STATS_KEY,
226 'success' if success else 'fail')
227 autotest_stats.Counter(counter_key).increment()
228 # metadata is uploaded separately so it can use http to upload.
229 metadata = {'drone': socket.gethostname(),
230 'job_id': job_id,
231 'success': success}
232 autotest_es.post(use_http=True,
233 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
234 metadata=metadata)
Dan Shicf4d2032015-03-12 15:04:21 -0700235 test_container.destroy()
236
237
Dan Shi3f1b8a52015-04-21 11:11:06 -0700238def correct_results_folder_permission(results):
239 """Make sure the results folder has the right permission settings.
240
241 For tests running with server-side packaging, the results folder has the
242 owner of root. This must be changed to the user running the autoserv
243 process, so parsing job can access the results folder.
244 TODO(dshi): crbug.com/459344 Remove this function when test container can be
245 unprivileged container.
246
247 @param results: Path to the results folder.
248
249 """
250 if not results:
251 return
252
Dan Shi32649b82015-08-29 20:53:36 -0700253 try:
254 utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
255 utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
256 except error.CmdError as e:
257 metadata = {'error': str(e),
258 'result_folder': results,
259 'drone': socket.gethostname()}
260 autotest_es.post(use_http=True, type_str='correct_results_folder_failure',
261 metadata=metadata)
262 raise
Dan Shi3f1b8a52015-04-21 11:11:06 -0700263
264
Dan Shia06f3e22015-09-03 16:15:15 -0700265def _start_servod(machine):
266 """Try to start servod in moblab if it's not already running or running with
267 different board or port.
268
269 @param machine: Name of the dut used for test.
270 """
271 if not utils.is_moblab():
272 return
273
Dan Shi1cded882015-09-23 16:52:26 -0700274 logging.debug('Trying to start servod.')
Dan Shia06f3e22015-09-03 16:15:15 -0700275 try:
276 afe = frontend.AFE()
277 board = server_utils.get_board_from_afe(machine, afe)
278 hosts = afe.get_hosts(hostname=machine)
279 servo_host = hosts[0].attributes.get('servo_host', None)
280 servo_port = hosts[0].attributes.get('servo_port', 9999)
281 if not servo_host in ['localhost', '127.0.0.1']:
Dan Shi1cded882015-09-23 16:52:26 -0700282 logging.warn('Starting servod is aborted. The dut\'s servo_host '
283 'attribute is not set to localhost.')
Dan Shia06f3e22015-09-03 16:15:15 -0700284 return
285 except (urllib2.HTTPError, urllib2.URLError):
286 # Ignore error if RPC failed to get board
287 logging.error('Failed to get board name from AFE. Start servod is '
288 'aborted')
289 return
290
291 try:
292 pid = utils.run('pgrep servod').stdout
293 cmd_line = utils.run('ps -fp %s' % pid).stdout
294 if ('--board %s' % board in cmd_line and
295 '--port %s' % servo_port in cmd_line):
296 logging.debug('Servod is already running with given board and port.'
297 ' There is no need to restart servod.')
298 return
299 logging.debug('Servod is running with different board or port. '
300 'Stopping existing servod.')
301 utils.run('sudo stop servod')
302 except error.CmdError:
303 # servod is not running.
304 pass
305
306 try:
307 utils.run(START_SERVOD_CMD % (board, servo_port))
308 logging.debug('Servod is started')
309 except error.CmdError as e:
310 logging.error('Servod failed to be started, error: %s', e)
311
312
Dan Shic68fefb2015-04-07 10:10:52 -0700313def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700314 """Run server job with given options.
315
316 @param pid_file_manager: PidFileManager used to monitor the autoserv process
317 @param results: Folder to store results.
318 @param parser: Parser for the command line arguments.
319 @param ssp_url: Url to server-side package.
Dan Shic68fefb2015-04-07 10:10:52 -0700320 @param use_ssp: Set to True to run with server-side packaging.
Dan Shicf4d2032015-03-12 15:04:21 -0700321 """
Dan Shiec1d47d2015-02-13 11:38:13 -0800322 if parser.options.warn_no_ssp:
Dan Shic68fefb2015-04-07 10:10:52 -0700323 # Post a warning in the log.
Dan Shiec1d47d2015-02-13 11:38:13 -0800324 logging.warn('Autoserv is required to run with server-side packaging. '
325 'However, no drone is found to support server-side '
326 'packaging. The test will be executed in a drone without '
327 'server-side packaging supported.')
328
jadmanski0afbb632008-06-06 21:10:57 +0000329 # send stdin to /dev/null
330 dev_null = os.open(os.devnull, os.O_RDONLY)
331 os.dup2(dev_null, sys.stdin.fileno())
332 os.close(dev_null)
mblighdbf37612007-11-24 19:38:11 +0000333
jadmanski0afbb632008-06-06 21:10:57 +0000334 # Create separate process group
335 os.setpgrp()
mbligh1d42d4e2007-11-05 22:42:00 +0000336
Dan Shicf4d2032015-03-12 15:04:21 -0700337 # Container name is predefined so the container can be destroyed in
338 # handle_sigterm.
339 job_or_task_id = job_directories.get_job_id_or_task_id(
340 parser.options.results)
341 container_name = (lxc.TEST_CONTAINER_NAME_FMT %
Dan Shid68d51c2015-04-21 17:00:42 -0700342 (job_or_task_id, time.time(), os.getpid()))
Dan Shicf4d2032015-03-12 15:04:21 -0700343
jadmanski0afbb632008-06-06 21:10:57 +0000344 # Implement SIGTERM handler
mblighc2299562009-07-02 19:00:36 +0000345 def handle_sigterm(signum, frame):
Simran Basi9d9b7292013-10-16 16:44:07 -0700346 logging.debug('Received SIGTERM')
mblighff7d61f2008-12-22 14:53:35 +0000347 if pid_file_manager:
348 pid_file_manager.close_file(1, signal.SIGTERM)
Simran Basi49e21e62013-10-17 12:40:33 -0700349 logging.debug('Finished writing to pid_file. Killing process.')
Dan Shi3f1b8a52015-04-21 11:11:06 -0700350
351 # Update results folder's file permission. This needs to be done ASAP
352 # before the parsing process tries to access the log.
353 if use_ssp and results:
354 correct_results_folder_permission(results)
355
Simran Basid6b83772014-01-06 16:31:30 -0800356 # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
357 # This sleep allows the pending output to be logged before the kill
358 # signal is sent.
359 time.sleep(.1)
Dan Shic68fefb2015-04-07 10:10:52 -0700360 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700361 logging.debug('Destroy container %s before aborting the autoserv '
362 'process.', container_name)
Dan Shi3f1b8a52015-04-21 11:11:06 -0700363 metadata = {'drone': socket.gethostname(),
364 'job_id': job_or_task_id,
365 'container_name': container_name,
366 'action': 'abort',
367 'success': True}
Dan Shicf4d2032015-03-12 15:04:21 -0700368 try:
369 bucket = lxc.ContainerBucket()
370 container = bucket.get(container_name)
371 if container:
372 container.destroy()
373 else:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700374 metadata['success'] = False
375 metadata['error'] = 'container not found'
Dan Shicf4d2032015-03-12 15:04:21 -0700376 logging.debug('Container %s is not found.', container_name)
377 except:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700378 metadata['success'] = False
379 metadata['error'] = 'Exception: %s' % sys.exc_info()
Dan Shicf4d2032015-03-12 15:04:21 -0700380 # Handle any exception so the autoserv process can be aborted.
381 logging.error('Failed to destroy container %s. Error: %s',
382 container_name, sys.exc_info())
Dan Shi3f1b8a52015-04-21 11:11:06 -0700383 autotest_es.post(use_http=True,
384 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
385 metadata=metadata)
Dan Shie4a4f9f2015-07-20 09:00:25 -0700386 # Try to correct the result file permission again after the
387 # container is destroyed, as the container might have created some
388 # new files in the result folder.
389 if results:
390 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700391
jadmanski0afbb632008-06-06 21:10:57 +0000392 os.killpg(os.getpgrp(), signal.SIGKILL)
mblighfaf0cd42007-11-19 16:00:24 +0000393
jadmanski0afbb632008-06-06 21:10:57 +0000394 # Set signal handler
mblighc2299562009-07-02 19:00:36 +0000395 signal.signal(signal.SIGTERM, handle_sigterm)
mbligha46678d2008-05-01 20:00:01 +0000396
Simran Basid6b83772014-01-06 16:31:30 -0800397 # faulthandler is only needed to debug in the Lab and is not avaliable to
398 # be imported in the chroot as part of VMTest, so Try-Except it.
399 try:
400 import faulthandler
401 faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
402 logging.debug('faulthandler registered on SIGTERM.')
403 except ImportError:
Christopher Grant4beca022015-06-16 15:14:47 -0400404 sys.exc_clear()
Simran Basid6b83772014-01-06 16:31:30 -0800405
David Rochberg8a60d1e2011-02-01 14:22:07 -0500406 # Ignore SIGTTOU's generated by output from forked children.
407 signal.signal(signal.SIGTTOU, signal.SIG_IGN)
408
Alex Millerf1af17e2013-01-09 22:50:32 -0800409 # If we received a SIGALARM, let's be loud about it.
410 signal.signal(signal.SIGALRM, log_alarm)
411
mbligha5f5e542009-12-30 16:57:49 +0000412 # Server side tests that call shell scripts often depend on $USER being set
413 # but depending on how you launch your autotest scheduler it may not be set.
414 os.environ['USER'] = getpass.getuser()
415
mblighb2bea302008-07-24 20:25:57 +0000416 label = parser.options.label
mbligh374f3412009-05-13 21:29:45 +0000417 group_name = parser.options.group_name
mblighb2bea302008-07-24 20:25:57 +0000418 user = parser.options.user
419 client = parser.options.client
420 server = parser.options.server
jadmanski0afbb632008-06-06 21:10:57 +0000421 install_before = parser.options.install_before
mblighb2bea302008-07-24 20:25:57 +0000422 install_after = parser.options.install_after
423 verify = parser.options.verify
424 repair = parser.options.repair
showard45ae8192008-11-05 19:32:53 +0000425 cleanup = parser.options.cleanup
Alex Millercb79ba72013-05-29 14:43:00 -0700426 provision = parser.options.provision
Dan Shi07e09af2013-04-12 09:31:29 -0700427 reset = parser.options.reset
Alex Miller667b5f22014-02-28 15:33:39 -0800428 job_labels = parser.options.job_labels
mblighb2bea302008-07-24 20:25:57 +0000429 no_tee = parser.options.no_tee
jadmanski0afbb632008-06-06 21:10:57 +0000430 parse_job = parser.options.parse_job
mblighe7d9c602009-07-02 19:02:33 +0000431 execution_tag = parser.options.execution_tag
432 if not execution_tag:
433 execution_tag = parse_job
jadmanski0afbb632008-06-06 21:10:57 +0000434 ssh_user = parser.options.ssh_user
435 ssh_port = parser.options.ssh_port
436 ssh_pass = parser.options.ssh_pass
jadmanskidef0c3c2009-03-25 20:07:10 +0000437 collect_crashinfo = parser.options.collect_crashinfo
mblighe0cbc912010-03-11 18:03:07 +0000438 control_filename = parser.options.control_filename
Scott Zawalski91493c82013-01-25 16:15:20 -0500439 test_retry = parser.options.test_retry
beepscb6f1e22013-06-28 19:14:10 -0700440 verify_job_repo_url = parser.options.verify_job_repo_url
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700441 skip_crash_collection = parser.options.skip_crash_collection
Aviv Keshet18ee3142013-08-12 15:01:51 -0700442 ssh_verbosity = int(parser.options.ssh_verbosity)
Fang Deng6cc20de2013-09-06 15:47:32 -0700443 ssh_options = parser.options.ssh_options
Dan Shib669cbd2013-09-13 11:17:17 -0700444 no_use_packaging = parser.options.no_use_packaging
Simran Basi1bf60eb2015-12-01 16:39:29 -0800445 host_attributes = parser.options.host_attributes
446 in_lab = bool(parser.options.lab)
mbligha46678d2008-05-01 20:00:01 +0000447
mblighb2bea302008-07-24 20:25:57 +0000448 # can't be both a client and a server side test
449 if client and server:
Eric Li861b2d52011-02-04 14:50:35 -0800450 parser.parser.error("Can not specify a test as both server and client!")
mblighb2bea302008-07-24 20:25:57 +0000451
Alex Millercb79ba72013-05-29 14:43:00 -0700452 if provision and client:
453 parser.parser.error("Cannot specify provisioning and client!")
454
455 is_special_task = (verify or repair or cleanup or collect_crashinfo or
Dan Shi07e09af2013-04-12 09:31:29 -0700456 provision or reset)
Alex Millercb79ba72013-05-29 14:43:00 -0700457 if len(parser.args) < 1 and not is_special_task:
Eric Li861b2d52011-02-04 14:50:35 -0800458 parser.parser.error("Missing argument: control file")
mbligha46678d2008-05-01 20:00:01 +0000459
Aviv Keshet18ee3142013-08-12 15:01:51 -0700460 if ssh_verbosity > 0:
461 # ssh_verbosity is an integer between 0 and 3, inclusive
462 ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
Fang Dengd1c2b732013-08-20 12:59:46 -0700463 else:
464 ssh_verbosity_flag = ''
Aviv Keshet18ee3142013-08-12 15:01:51 -0700465
showard45ae8192008-11-05 19:32:53 +0000466 # We have a control file unless it's just a verify/repair/cleanup job
jadmanski0afbb632008-06-06 21:10:57 +0000467 if len(parser.args) > 0:
468 control = parser.args[0]
469 else:
470 control = None
mbligha46678d2008-05-01 20:00:01 +0000471
Dan Shicf4d2032015-03-12 15:04:21 -0700472 machines = _get_machines(parser)
mbligh374f3412009-05-13 21:29:45 +0000473 if group_name and len(machines) < 2:
Dan Shicf4d2032015-03-12 15:04:21 -0700474 parser.parser.error('-G %r may only be supplied with more than one '
475 'machine.' % group_name)
mbligh374f3412009-05-13 21:29:45 +0000476
Christopher Wiley8a91f232013-07-09 11:02:27 -0700477 kwargs = {'group_name': group_name, 'tag': execution_tag,
Dan Shicf4d2032015-03-12 15:04:21 -0700478 'disable_sysinfo': parser.options.disable_sysinfo}
Dan Shi70647ca2015-07-16 22:52:35 -0700479 if parser.options.parent_job_id:
480 kwargs['parent_job_id'] = int(parser.options.parent_job_id)
mblighe0cbc912010-03-11 18:03:07 +0000481 if control_filename:
482 kwargs['control_filename'] = control_filename
Simran Basi1bf60eb2015-12-01 16:39:29 -0800483 if host_attributes:
484 kwargs['host_attributes'] = host_attributes
485 kwargs['in_lab'] = in_lab
jadmanski0afbb632008-06-06 21:10:57 +0000486 job = server_job.server_job(control, parser.args[1:], results, label,
487 user, machines, client, parse_job,
Fang Dengd1c2b732013-08-20 12:59:46 -0700488 ssh_user, ssh_port, ssh_pass,
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700489 ssh_verbosity_flag, ssh_options,
490 test_retry, **kwargs)
Dan Shicf4d2032015-03-12 15:04:21 -0700491
showard75cdfee2009-06-10 17:40:41 +0000492 job.logging.start_logging()
mbligh4608b002010-01-05 18:22:35 +0000493 job.init_parser()
mbligha46678d2008-05-01 20:00:01 +0000494
mbligh161fe6f2008-06-19 16:26:04 +0000495 # perform checks
496 job.precheck()
497
jadmanski0afbb632008-06-06 21:10:57 +0000498 # run the job
499 exit_code = 0
Dan Shic1b8bdd2015-09-14 23:11:24 -0700500 auto_start_servod = _CONFIG.get_config_value(
501 'AUTOSERV', 'auto_start_servod', type=bool, default=False)
jadmanski0afbb632008-06-06 21:10:57 +0000502 try:
mbligh332000a2009-06-08 16:47:28 +0000503 try:
504 if repair:
Dan Shic1b8bdd2015-09-14 23:11:24 -0700505 if auto_start_servod and len(machines) == 1:
506 _start_servod(machines[0])
J. Richard Barnettec2d99cf2015-11-18 12:46:15 -0800507 job.repair(job_labels)
mbligh332000a2009-06-08 16:47:28 +0000508 elif verify:
Alex Miller667b5f22014-02-28 15:33:39 -0800509 job.verify(job_labels)
Alex Millercb79ba72013-05-29 14:43:00 -0700510 elif provision:
Alex Miller667b5f22014-02-28 15:33:39 -0800511 job.provision(job_labels)
Dan Shi07e09af2013-04-12 09:31:29 -0700512 elif reset:
Alex Miller667b5f22014-02-28 15:33:39 -0800513 job.reset(job_labels)
Fang Dengad78aca2014-10-02 18:15:46 -0700514 elif cleanup:
515 job.cleanup(job_labels)
mbligh332000a2009-06-08 16:47:28 +0000516 else:
Dan Shia06f3e22015-09-03 16:15:15 -0700517 if auto_start_servod and len(machines) == 1:
518 _start_servod(machines[0])
Dan Shic68fefb2015-04-07 10:10:52 -0700519 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700520 try:
Dan Shi37befda2015-12-07 13:16:56 -0800521 _run_with_ssp(job, container_name, job_or_task_id,
522 results, parser, ssp_url)
Dan Shicf4d2032015-03-12 15:04:21 -0700523 finally:
524 # Update the ownership of files in result folder.
Dan Shi3f1b8a52015-04-21 11:11:06 -0700525 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700526 else:
527 job.run(install_before, install_after,
528 verify_job_repo_url=verify_job_repo_url,
529 only_collect_crashinfo=collect_crashinfo,
530 skip_crash_collection=skip_crash_collection,
Dan Shib669cbd2013-09-13 11:17:17 -0700531 job_labels=job_labels,
532 use_packaging=(not no_use_packaging))
mbligh332000a2009-06-08 16:47:28 +0000533 finally:
534 while job.hosts:
535 host = job.hosts.pop()
536 host.close()
jadmanski0afbb632008-06-06 21:10:57 +0000537 except:
jadmanski27b37ea2008-10-29 23:54:31 +0000538 exit_code = 1
jadmanski0afbb632008-06-06 21:10:57 +0000539 traceback.print_exc()
mbligha46678d2008-05-01 20:00:01 +0000540
mblighff7d61f2008-12-22 14:53:35 +0000541 if pid_file_manager:
542 pid_file_manager.num_tests_failed = job.num_tests_failed
543 pid_file_manager.close_file(exit_code)
jadmanskie0dffc32008-12-15 17:30:30 +0000544 job.cleanup_parser()
showard21baa452008-10-21 00:08:39 +0000545
jadmanski27b37ea2008-10-29 23:54:31 +0000546 sys.exit(exit_code)
mbligha46678d2008-05-01 20:00:01 +0000547
548
Fang Deng042c1472014-10-23 13:56:41 -0700549def record_autoserv(options, duration_secs):
550 """Record autoserv end-to-end time in metadata db.
551
552 @param options: parser options.
553 @param duration_secs: How long autoserv has taken, in secs.
554 """
555 # Get machine hostname
556 machines = options.machines.replace(
557 ',', ' ').strip().split() if options.machines else []
558 num_machines = len(machines)
559 if num_machines > 1:
560 # Skip the case where atomic group is used.
561 return
562 elif num_machines == 0:
563 machines.append('hostless')
564
565 # Determine the status that will be reported.
566 s = job_overhead.STATUS
567 task_mapping = {
568 'reset': s.RESETTING, 'verify': s.VERIFYING,
569 'provision': s.PROVISIONING, 'repair': s.REPAIRING,
570 'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
Dan Shi888cfca2015-07-31 15:49:00 -0700571 match = filter(lambda task: getattr(options, task, False) == True,
572 task_mapping)
Fang Deng042c1472014-10-23 13:56:41 -0700573 status = task_mapping[match[0]] if match else s.RUNNING
574 is_special_task = status not in [s.RUNNING, s.GATHERING]
Dan Shicf4d2032015-03-12 15:04:21 -0700575 job_or_task_id = job_directories.get_job_id_or_task_id(options.results)
Fang Deng042c1472014-10-23 13:56:41 -0700576 job_overhead.record_state_duration(
577 job_or_task_id, machines[0], status, duration_secs,
578 is_special_task=is_special_task)
579
580
mbligha46678d2008-05-01 20:00:01 +0000581def main():
Fang Deng042c1472014-10-23 13:56:41 -0700582 start_time = datetime.datetime.now()
Dan Shia1ecd5c2013-06-06 11:21:31 -0700583 # White list of tests with run time measurement enabled.
Dan Shia06f3e22015-09-03 16:15:15 -0700584 measure_run_time_tests_names = _CONFIG.get_config_value(
585 'AUTOSERV', 'measure_run_time_tests', type=str)
Dan Shia1ecd5c2013-06-06 11:21:31 -0700586 if measure_run_time_tests_names:
587 measure_run_time_tests = [t.strip() for t in
588 measure_run_time_tests_names.split(',')]
589 else:
590 measure_run_time_tests = []
jadmanski0afbb632008-06-06 21:10:57 +0000591 # grab the parser
592 parser = autoserv_parser.autoserv_parser
mbligha5cb4062009-02-17 15:53:39 +0000593 parser.parse_args()
mbligha46678d2008-05-01 20:00:01 +0000594
jadmanski0afbb632008-06-06 21:10:57 +0000595 if len(sys.argv) == 1:
596 parser.parser.print_help()
597 sys.exit(1)
mbligha6f13082008-06-05 23:53:46 +0000598
Dan Shicf4d2032015-03-12 15:04:21 -0700599 # If the job requires to run with server-side package, try to stage server-
600 # side package first. If that fails with error that autotest server package
Dan Shic68fefb2015-04-07 10:10:52 -0700601 # does not exist, fall back to run the job without using server-side
602 # packaging. If option warn_no_ssp is specified, that means autoserv is
603 # running in a drone does not support SSP, thus no need to stage server-side
604 # package.
Dan Shicf4d2032015-03-12 15:04:21 -0700605 ssp_url = None
Dan Shi0b754c52015-04-20 14:20:38 -0700606 ssp_url_warning = False
Dan Shic68fefb2015-04-07 10:10:52 -0700607 if (not parser.options.warn_no_ssp and parser.options.require_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700608 ssp_url = _stage_ssp(parser)
Dan Shi0b754c52015-04-20 14:20:38 -0700609 # The build does not have autotest server package. Fall back to not
610 # to use server-side package. Logging is postponed until logging being
611 # set up.
612 ssp_url_warning = not ssp_url
Dan Shicf4d2032015-03-12 15:04:21 -0700613
showard75cdfee2009-06-10 17:40:41 +0000614 if parser.options.no_logging:
615 results = None
616 else:
617 results = parser.options.results
mbligh80e1eba2008-11-19 00:26:18 +0000618 if not results:
619 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
620 results = os.path.abspath(results)
showard566d3c02010-01-12 18:57:01 +0000621 resultdir_exists = False
622 for filename in ('control.srv', 'status.log', '.autoserv_execute'):
623 if os.path.exists(os.path.join(results, filename)):
624 resultdir_exists = True
mbligh4608b002010-01-05 18:22:35 +0000625 if not parser.options.use_existing_results and resultdir_exists:
mbligh80e1eba2008-11-19 00:26:18 +0000626 error = "Error: results directory already exists: %s\n" % results
627 sys.stderr.write(error)
628 sys.exit(1)
mbligha788dc42009-03-26 21:10:16 +0000629
630 # Now that we certified that there's no leftover results dir from
631 # previous jobs, lets create the result dir since the logging system
632 # needs to create the log file in there.
633 if not os.path.isdir(results):
634 os.makedirs(results)
showard75cdfee2009-06-10 17:40:41 +0000635
Dan Shic68fefb2015-04-07 10:10:52 -0700636 # Server-side packaging will only be used if it's required and the package
637 # is available. If warn_no_ssp is specified, it means that autoserv is
638 # running in a drone does not have SSP supported and a warning will be logs.
639 # Therefore, it should not run with SSP.
640 use_ssp = (not parser.options.warn_no_ssp and parser.options.require_ssp
641 and ssp_url)
642 if use_ssp:
Dan Shie28de552015-05-06 16:51:58 -0700643 log_dir = os.path.join(results, 'ssp_logs') if results else None
Dan Shicf4d2032015-03-12 15:04:21 -0700644 if log_dir and not os.path.exists(log_dir):
645 os.makedirs(log_dir)
646 else:
647 log_dir = results
Dan Shi3f1b8a52015-04-21 11:11:06 -0700648
showard75cdfee2009-06-10 17:40:41 +0000649 logging_manager.configure_logging(
Dan Shicf4d2032015-03-12 15:04:21 -0700650 server_logging_config.ServerLoggingConfig(),
651 results_dir=log_dir,
showard10d84172009-06-18 23:16:50 +0000652 use_console=not parser.options.no_tee,
653 verbose=parser.options.verbose,
654 no_console_prefix=parser.options.no_console_prefix)
Dan Shicf4d2032015-03-12 15:04:21 -0700655
Dan Shi0b754c52015-04-20 14:20:38 -0700656 if ssp_url_warning:
657 logging.warn(
658 'Autoserv is required to run with server-side packaging. '
659 'However, no server-side package can be found based on '
660 '`--image`, host attribute job_repo_url or host label of '
661 'cros-version. The test will be executed without '
662 'server-side packaging supported.')
663
showard75cdfee2009-06-10 17:40:41 +0000664 if results:
mbligha788dc42009-03-26 21:10:16 +0000665 logging.info("Results placed in %s" % results)
mbligh10717632008-11-19 00:21:57 +0000666
mbligh4608b002010-01-05 18:22:35 +0000667 # wait until now to perform this check, so it get properly logged
Dan Shicf4d2032015-03-12 15:04:21 -0700668 if (parser.options.use_existing_results and not resultdir_exists and
Dan Shiff78f112015-06-12 13:34:02 -0700669 not utils.is_in_container()):
mbligh4608b002010-01-05 18:22:35 +0000670 logging.error("No existing results directory found: %s", results)
671 sys.exit(1)
672
Dan Shicf4d2032015-03-12 15:04:21 -0700673 logging.debug('autoserv is running in drone %s.', socket.gethostname())
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700674 logging.debug('autoserv command was: %s', ' '.join(sys.argv))
mbligh4608b002010-01-05 18:22:35 +0000675
Dan Shicf4d2032015-03-12 15:04:21 -0700676 if parser.options.write_pidfile and results:
mbligh4608b002010-01-05 18:22:35 +0000677 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
678 results)
jadmanskid5ab8c52008-12-03 16:27:07 +0000679 pid_file_manager.open_file()
mblighff7d61f2008-12-22 14:53:35 +0000680 else:
681 pid_file_manager = None
mbligha46678d2008-05-01 20:00:01 +0000682
jadmanskif22fea82008-11-26 20:57:07 +0000683 autotest.BaseAutotest.set_install_in_tmpdir(
684 parser.options.install_in_tmpdir)
685
Dan Shia1ecd5c2013-06-06 11:21:31 -0700686 timer = None
687 try:
688 # Take the first argument as control file name, get the test name from
689 # the control file. If the test name exists in the list of tests with
690 # run time measurement enabled, start a timer to begin measurement.
691 if (len(parser.args) > 0 and parser.args[0] != '' and
692 parser.options.machines):
Dan Shibbc16132013-07-09 16:23:59 -0700693 try:
694 test_name = control_data.parse_control(parser.args[0],
695 raise_warnings=True).name
696 except control_data.ControlVariableException:
697 logging.debug('Failed to retrieve test name from control file.')
698 test_name = None
Dan Shia1ecd5c2013-06-06 11:21:31 -0700699 if test_name in measure_run_time_tests:
700 machines = parser.options.machines.replace(',', ' '
701 ).strip().split()
Dan Shi8eac5af2014-09-17 00:15:15 -0700702 try:
703 afe = frontend.AFE()
704 board = server_utils.get_board_from_afe(machines[0], afe)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800705 timer = autotest_stats.Timer('autoserv_run_time.%s.%s' %
706 (board, test_name))
Dan Shi8eac5af2014-09-17 00:15:15 -0700707 timer.start()
708 except (urllib2.HTTPError, urllib2.URLError):
709 # Ignore error if RPC failed to get board
710 pass
Dan Shia1ecd5c2013-06-06 11:21:31 -0700711 except control_data.ControlVariableException as e:
712 logging.error(str(e))
jadmanski0afbb632008-06-06 21:10:57 +0000713 exit_code = 0
Prashanth B6285f6a2014-05-08 18:01:27 -0700714 # TODO(beeps): Extend this to cover different failure modes.
715 # Testing exceptions are matched against labels sent to autoserv. Eg,
716 # to allow only the hostless job to run, specify
717 # testing_exceptions: test_suite in the shadow_config. To allow both
718 # the hostless job and dummy_Pass to run, specify
719 # testing_exceptions: test_suite,dummy_Pass. You can figure out
720 # what label autoserv is invoked with by looking through the logs of a test
721 # for the autoserv command's -l option.
Dan Shia06f3e22015-09-03 16:15:15 -0700722 testing_exceptions = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700723 'AUTOSERV', 'testing_exceptions', type=list, default=[])
Dan Shia06f3e22015-09-03 16:15:15 -0700724 test_mode = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700725 'AUTOSERV', 'testing_mode', type=bool, default=False)
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800726 test_mode = (results_mocker and test_mode and not
727 any([ex in parser.options.label
728 for ex in testing_exceptions]))
729 is_task = (parser.options.verify or parser.options.repair or
730 parser.options.provision or parser.options.reset or
731 parser.options.cleanup or parser.options.collect_crashinfo)
jadmanski0afbb632008-06-06 21:10:57 +0000732 try:
733 try:
Prashanth B6285f6a2014-05-08 18:01:27 -0700734 if test_mode:
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800735 # The parser doesn't run on tasks anyway, so we can just return
736 # happy signals without faking results.
737 if not is_task:
738 machine = parser.options.results.split('/')[-1]
739
740 # TODO(beeps): The proper way to do this would be to
741 # refactor job creation so we can invoke job.record
742 # directly. To do that one needs to pipe the test_name
743 # through run_autoserv and bail just before invoking
744 # the server job. See the comment in
745 # puppylab/results_mocker for more context.
746 results_mocker.ResultsMocker(
Prashanth Balasubramanian22dd2262014-11-28 18:19:18 -0800747 test_name if test_name else 'unknown-test',
748 parser.options.results, machine
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800749 ).mock_results()
750 return
Prashanth B6285f6a2014-05-08 18:01:27 -0700751 else:
Dan Shic68fefb2015-04-07 10:10:52 -0700752 run_autoserv(pid_file_manager, results, parser, ssp_url,
753 use_ssp)
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700754 except SystemExit as e:
jadmanski0afbb632008-06-06 21:10:57 +0000755 exit_code = e.code
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700756 if exit_code:
757 logging.exception(e)
758 except Exception as e:
jadmanski0afbb632008-06-06 21:10:57 +0000759 # If we don't know what happened, we'll classify it as
760 # an 'abort' and return 1.
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700761 logging.exception(e)
jadmanski0afbb632008-06-06 21:10:57 +0000762 exit_code = 1
763 finally:
mblighff7d61f2008-12-22 14:53:35 +0000764 if pid_file_manager:
765 pid_file_manager.close_file(exit_code)
Dan Shia1ecd5c2013-06-06 11:21:31 -0700766 if timer:
767 timer.stop()
Fang Deng042c1472014-10-23 13:56:41 -0700768 # Record the autoserv duration time. Must be called
769 # just before the system exits to ensure accuracy.
770 duration_secs = (datetime.datetime.now() - start_time).total_seconds()
771 record_autoserv(parser.options, duration_secs)
jadmanski0afbb632008-06-06 21:10:57 +0000772 sys.exit(exit_code)
mblighfaf0cd42007-11-19 16:00:24 +0000773
mblighbb421852008-03-11 22:36:16 +0000774
mbligha46678d2008-05-01 20:00:01 +0000775if __name__ == '__main__':
jadmanski0afbb632008-06-06 21:10:57 +0000776 main()