blob: 53f9a44647fefc8a5f083b8468e29fdb1c2c85e9 [file] [log] [blame]
mbligh6203ace2007-10-04 21:54:24 +00001#!/usr/bin/python -u
mbligh1ffd5dc2008-11-25 13:24:05 +00002# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
mbligh82648e52008-11-20 16:54:25 +00003# Released under the GPL v2
mblighdcd57a82007-07-11 23:06:47 +00004
mblighc8949b82007-07-23 16:33:58 +00005"""
Aviv Keshetde6bb192013-01-30 16:17:22 -08006Run a control file through the server side engine
mblighdcd57a82007-07-11 23:06:47 +00007"""
mbligh1ffd5dc2008-11-25 13:24:05 +00008
Fang Deng042c1472014-10-23 13:56:41 -07009import ast
10import datetime
11import getpass
12import logging
13import os
14import re
15import signal
Dan Shicf4d2032015-03-12 15:04:21 -070016import socket
Fang Deng042c1472014-10-23 13:56:41 -070017import sys
18import traceback
19import time
20import urllib2
mbligh1ffd5dc2008-11-25 13:24:05 +000021
mblighf5427bb2008-04-09 15:55:57 +000022import common
mbligh9ff89cd2009-09-03 20:28:17 +000023
Dan Shia1ecd5c2013-06-06 11:21:31 -070024from autotest_lib.client.common_lib import control_data
Dan Shi32649b82015-08-29 20:53:36 -070025from autotest_lib.client.common_lib import error
Dan Shia1ecd5c2013-06-06 11:21:31 -070026from autotest_lib.client.common_lib import global_config
Dan Shi5ddf6a32015-05-02 00:22:01 -070027from autotest_lib.client.common_lib import utils
Dan Shi37bee222015-04-13 15:46:47 -070028from autotest_lib.client.common_lib.cros.graphite import autotest_es
29from autotest_lib.client.common_lib.cros.graphite import autotest_stats
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -080030try:
31 from autotest_lib.puppylab import results_mocker
32except ImportError:
33 results_mocker = None
34
Dan Shia06f3e22015-09-03 16:15:15 -070035_CONFIG = global_config.global_config
36
37require_atfork = _CONFIG.get_config_value(
mblighcb8cb332009-09-03 21:08:56 +000038 'AUTOSERV', 'require_atfork_module', type=bool, default=True)
39
Dan Shia1ecd5c2013-06-06 11:21:31 -070040
Jakob Jueliche497b552014-09-23 19:11:59 -070041# Number of seconds to wait before returning if testing mode is enabled
Prashanth B6285f6a2014-05-08 18:01:27 -070042TESTING_MODE_SLEEP_SECS = 1
Jakob Jueliche497b552014-09-23 19:11:59 -070043
mblighcb8cb332009-09-03 21:08:56 +000044try:
45 import atfork
46 atfork.monkeypatch_os_fork_functions()
47 import atfork.stdlib_fixer
48 # Fix the Python standard library for threading+fork safety with its
49 # internal locks. http://code.google.com/p/python-atfork/
50 import warnings
51 warnings.filterwarnings('ignore', 'logging module already imported')
52 atfork.stdlib_fixer.fix_logging_module()
53except ImportError, e:
54 from autotest_lib.client.common_lib import global_config
Dan Shia06f3e22015-09-03 16:15:15 -070055 if _CONFIG.get_config_value(
mblighcb8cb332009-09-03 21:08:56 +000056 'AUTOSERV', 'require_atfork_module', type=bool, default=False):
57 print >>sys.stderr, 'Please run utils/build_externals.py'
58 print e
59 sys.exit(1)
mbligh9ff89cd2009-09-03 20:28:17 +000060
showard75cdfee2009-06-10 17:40:41 +000061from autotest_lib.server import server_logging_config
showard043c62a2009-06-10 19:48:57 +000062from autotest_lib.server import server_job, utils, autoserv_parser, autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070063from autotest_lib.server import utils as server_utils
Kevin Chengadc99f92016-07-20 08:21:58 -070064from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Dan Shicf4d2032015-03-12 15:04:21 -070065from autotest_lib.site_utils import job_directories
Fang Deng042c1472014-10-23 13:56:41 -070066from autotest_lib.site_utils import job_overhead
Dan Shicf4d2032015-03-12 15:04:21 -070067from autotest_lib.site_utils import lxc
Dan Shi7836d252015-04-27 15:33:58 -070068from autotest_lib.site_utils import lxc_utils
showard75cdfee2009-06-10 17:40:41 +000069from autotest_lib.client.common_lib import pidfile, logging_manager
Gabe Black1e1c41b2015-02-04 23:55:15 -080070from autotest_lib.client.common_lib.cros.graphite import autotest_stats
mbligh92c0fc22008-11-20 16:52:23 +000071
Dan Shicf4d2032015-03-12 15:04:21 -070072# Control segment to stage server-side package.
73STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
74 'stage_server_side_package')
75
Dan Shia06f3e22015-09-03 16:15:15 -070076# Command line to start servod in a moblab.
77START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s'
78STOP_SERVOD_CMD = 'sudo stop servod'
79
Alex Millerf1af17e2013-01-09 22:50:32 -080080def log_alarm(signum, frame):
81 logging.error("Received SIGALARM. Ignoring and continuing on.")
Alex Miller0528d6f2013-01-11 10:49:48 -080082 sys.exit(1)
Alex Millerf1af17e2013-01-09 22:50:32 -080083
Dan Shicf4d2032015-03-12 15:04:21 -070084
85def _get_machines(parser):
86 """Get a list of machine names from command line arg -m or a file.
87
88 @param parser: Parser for the command line arguments.
89
90 @return: A list of machine names from command line arg -m or the
91 machines file specified in the command line arg -M.
92 """
93 if parser.options.machines:
94 machines = parser.options.machines.replace(',', ' ').strip().split()
95 else:
96 machines = []
97 machines_file = parser.options.machines_file
98 if machines_file:
99 machines = []
100 for m in open(machines_file, 'r').readlines():
101 # remove comments, spaces
102 m = re.sub('#.*', '', m).strip()
103 if m:
104 machines.append(m)
105 logging.debug('Read list of machines from file: %s', machines_file)
106 logging.debug('Machines: %s', ','.join(machines))
107
108 if machines:
109 for machine in machines:
110 if not machine or re.search('\s', machine):
111 parser.parser.error("Invalid machine: %s" % str(machine))
112 machines = list(set(machines))
113 machines.sort()
114 return machines
115
116
117def _stage_ssp(parser):
118 """Stage server-side package.
119
120 This function calls a control segment to stage server-side package based on
121 the job and autoserv command line option. The detail implementation could
122 be different for each host type. Currently, only CrosHost has
123 stage_server_side_package function defined.
124 The script returns None if no server-side package is available. However,
125 it may raise exception if it failed for reasons other than artifact (the
126 server-side package) not found.
127
128 @param parser: Command line arguments parser passed in the autoserv process.
129
130 @return: url of the staged server-side package. Return None if server-
131 side package is not found for the build.
132 """
Kevin Chengadc99f92016-07-20 08:21:58 -0700133 machines_list = _get_machines(parser)
134 if bool(parser.options.lab):
135 machine_dict_list = []
136 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
137 for machine in machines_list:
138 afe_host = afe.get_hosts(hostname=machine)[0]
139 machine_dict_list.append({'hostname': machine, 'afe_host': afe_host})
140 machines_list = machine_dict_list
141
Dan Shi36cfd832014-10-10 13:38:51 -0700142 # If test_source_build is not specified, default to use server-side test
143 # code from build specified in --image.
Kevin Chengadc99f92016-07-20 08:21:58 -0700144 namespace = {'machines': machines_list,
Dan Shi36cfd832014-10-10 13:38:51 -0700145 'image': (parser.options.test_source_build or
146 parser.options.image),}
Dan Shicf4d2032015-03-12 15:04:21 -0700147 script_locals = {}
148 execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
149 return script_locals['ssp_url']
150
151
Dan Shiafa63872016-02-23 15:32:31 -0800152def _run_with_ssp(job, container_name, job_id, results, parser, ssp_url,
153 job_folder):
Dan Shicf4d2032015-03-12 15:04:21 -0700154 """Run the server job with server-side packaging.
155
Dan Shi37befda2015-12-07 13:16:56 -0800156 @param job: The server job object.
Dan Shicf4d2032015-03-12 15:04:21 -0700157 @param container_name: Name of the container to run the test.
158 @param job_id: ID of the test job.
159 @param results: Folder to store results. This could be different from
160 parser.options.results:
161 parser.options.results can be set to None for results to be
162 stored in a temp folder.
163 results can be None for autoserv run requires no logging.
164 @param parser: Command line parser that contains the options.
165 @param ssp_url: url of the staged server-side package.
Dan Shiafa63872016-02-23 15:32:31 -0800166 @param job_folder: Name of the job result folder.
Dan Shicf4d2032015-03-12 15:04:21 -0700167 """
168 bucket = lxc.ContainerBucket()
169 control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
170 else None)
Dan Shi37befda2015-12-07 13:16:56 -0800171 try:
172 test_container = bucket.setup_test(container_name, job_id, ssp_url,
Dan Shiafa63872016-02-23 15:32:31 -0800173 results, control=control,
174 job_folder=job_folder)
Dan Shi37befda2015-12-07 13:16:56 -0800175 except Exception as e:
176 job.record('FAIL', None, None,
177 'Failed to setup container for test: %s. Check logs in '
178 'ssp_logs folder for more details.' % e)
179 raise
180
Dan Shicf4d2032015-03-12 15:04:21 -0700181 args = sys.argv[:]
182 args.remove('--require-ssp')
Dan Shi77b79a62015-07-29 16:22:05 -0700183 # --parent_job_id is only useful in autoserv running in host, not in
184 # container. Include this argument will cause test to fail for builds before
185 # CL 286265 was merged.
186 if '--parent_job_id' in args:
187 index = args.index('--parent_job_id')
188 args.remove('--parent_job_id')
189 # Remove the actual parent job id in command line arg.
190 del args[index]
Dan Shicf4d2032015-03-12 15:04:21 -0700191
192 # A dictionary of paths to replace in the command line. Key is the path to
193 # be replaced with the one in value.
194 paths_to_replace = {}
195 # Replace the control file path with the one in container.
196 if control:
197 container_control_filename = os.path.join(
198 lxc.CONTROL_TEMP_PATH, os.path.basename(control))
199 paths_to_replace[control] = container_control_filename
200 # Update result directory with the one in container.
201 if parser.options.results:
Dan Shiafa63872016-02-23 15:32:31 -0800202 container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_folder)
Dan Shicf4d2032015-03-12 15:04:21 -0700203 paths_to_replace[parser.options.results] = container_result_dir
204 # Update parse_job directory with the one in container. The assumption is
205 # that the result folder to be parsed is always the same as the results_dir.
206 if parser.options.parse_job:
Dan Shiafa63872016-02-23 15:32:31 -0800207 container_parse_dir = os.path.join(lxc.RESULT_DIR_FMT % job_folder)
Dan Shicf4d2032015-03-12 15:04:21 -0700208 paths_to_replace[parser.options.parse_job] = container_result_dir
209
210 args = [paths_to_replace.get(arg, arg) for arg in args]
211
212 # Apply --use-existing-results, results directory is aready created and
213 # mounted in container. Apply this arg to avoid exception being raised.
214 if not '--use-existing-results' in args:
215 args.append('--use-existing-results')
216
217 # Make sure autoserv running in container using a different pid file.
218 if not '--pidfile-label' in args:
219 args.extend(['--pidfile-label', 'container_autoserv'])
220
Dan Shid1f51232015-04-18 00:29:14 -0700221 cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
Dan Shicf4d2032015-03-12 15:04:21 -0700222 logging.info('Run command in container: %s', cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700223 success = False
Dan Shicf4d2032015-03-12 15:04:21 -0700224 try:
225 test_container.attach_run(cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700226 success = True
Dan Shi9d3454e2015-12-08 09:16:08 -0800227 except Exception as e:
228 # If the test run inside container fails without generating any log,
229 # write a message to status.log to help troubleshooting.
230 debug_files = os.listdir(os.path.join(results, 'debug'))
231 if not debug_files:
232 job.record('FAIL', None, None,
233 'Failed to run test inside the container: %s. Check '
234 'logs in ssp_logs folder for more details.' % e)
235 raise
Dan Shicf4d2032015-03-12 15:04:21 -0700236 finally:
Dan Shi37bee222015-04-13 15:46:47 -0700237 counter_key = '%s.%s' % (lxc.STATS_KEY,
238 'success' if success else 'fail')
239 autotest_stats.Counter(counter_key).increment()
240 # metadata is uploaded separately so it can use http to upload.
241 metadata = {'drone': socket.gethostname(),
242 'job_id': job_id,
243 'success': success}
244 autotest_es.post(use_http=True,
245 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
246 metadata=metadata)
Dan Shicf4d2032015-03-12 15:04:21 -0700247 test_container.destroy()
248
249
Dan Shi3f1b8a52015-04-21 11:11:06 -0700250def correct_results_folder_permission(results):
251 """Make sure the results folder has the right permission settings.
252
253 For tests running with server-side packaging, the results folder has the
254 owner of root. This must be changed to the user running the autoserv
255 process, so parsing job can access the results folder.
256 TODO(dshi): crbug.com/459344 Remove this function when test container can be
257 unprivileged container.
258
259 @param results: Path to the results folder.
260
261 """
262 if not results:
263 return
264
Dan Shi32649b82015-08-29 20:53:36 -0700265 try:
266 utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
267 utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
268 except error.CmdError as e:
269 metadata = {'error': str(e),
270 'result_folder': results,
271 'drone': socket.gethostname()}
272 autotest_es.post(use_http=True, type_str='correct_results_folder_failure',
273 metadata=metadata)
274 raise
Dan Shi3f1b8a52015-04-21 11:11:06 -0700275
276
Dan Shia06f3e22015-09-03 16:15:15 -0700277def _start_servod(machine):
278 """Try to start servod in moblab if it's not already running or running with
279 different board or port.
280
281 @param machine: Name of the dut used for test.
282 """
283 if not utils.is_moblab():
284 return
285
Dan Shi1cded882015-09-23 16:52:26 -0700286 logging.debug('Trying to start servod.')
Dan Shia06f3e22015-09-03 16:15:15 -0700287 try:
Kevin Chengadc99f92016-07-20 08:21:58 -0700288 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
Dan Shia06f3e22015-09-03 16:15:15 -0700289 board = server_utils.get_board_from_afe(machine, afe)
290 hosts = afe.get_hosts(hostname=machine)
291 servo_host = hosts[0].attributes.get('servo_host', None)
292 servo_port = hosts[0].attributes.get('servo_port', 9999)
293 if not servo_host in ['localhost', '127.0.0.1']:
Dan Shi1cded882015-09-23 16:52:26 -0700294 logging.warn('Starting servod is aborted. The dut\'s servo_host '
295 'attribute is not set to localhost.')
Dan Shia06f3e22015-09-03 16:15:15 -0700296 return
297 except (urllib2.HTTPError, urllib2.URLError):
298 # Ignore error if RPC failed to get board
299 logging.error('Failed to get board name from AFE. Start servod is '
300 'aborted')
301 return
302
303 try:
304 pid = utils.run('pgrep servod').stdout
305 cmd_line = utils.run('ps -fp %s' % pid).stdout
306 if ('--board %s' % board in cmd_line and
307 '--port %s' % servo_port in cmd_line):
308 logging.debug('Servod is already running with given board and port.'
309 ' There is no need to restart servod.')
310 return
311 logging.debug('Servod is running with different board or port. '
312 'Stopping existing servod.')
313 utils.run('sudo stop servod')
314 except error.CmdError:
315 # servod is not running.
316 pass
317
318 try:
319 utils.run(START_SERVOD_CMD % (board, servo_port))
320 logging.debug('Servod is started')
321 except error.CmdError as e:
322 logging.error('Servod failed to be started, error: %s', e)
323
324
Dan Shic68fefb2015-04-07 10:10:52 -0700325def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700326 """Run server job with given options.
327
328 @param pid_file_manager: PidFileManager used to monitor the autoserv process
329 @param results: Folder to store results.
330 @param parser: Parser for the command line arguments.
331 @param ssp_url: Url to server-side package.
Dan Shic68fefb2015-04-07 10:10:52 -0700332 @param use_ssp: Set to True to run with server-side packaging.
Dan Shicf4d2032015-03-12 15:04:21 -0700333 """
Dan Shiec1d47d2015-02-13 11:38:13 -0800334 if parser.options.warn_no_ssp:
Dan Shic68fefb2015-04-07 10:10:52 -0700335 # Post a warning in the log.
Dan Shiec1d47d2015-02-13 11:38:13 -0800336 logging.warn('Autoserv is required to run with server-side packaging. '
337 'However, no drone is found to support server-side '
338 'packaging. The test will be executed in a drone without '
339 'server-side packaging supported.')
340
jadmanski0afbb632008-06-06 21:10:57 +0000341 # send stdin to /dev/null
342 dev_null = os.open(os.devnull, os.O_RDONLY)
343 os.dup2(dev_null, sys.stdin.fileno())
344 os.close(dev_null)
mblighdbf37612007-11-24 19:38:11 +0000345
Dan Shie8aeb662016-06-30 11:22:03 -0700346 # Create separate process group if the process is not a process group
347 # leader. This allows autoserv process to keep running after the caller
348 # process (drone manager call) exits.
349 if os.getpid() != os.getpgid(0):
350 os.setsid()
mbligh1d42d4e2007-11-05 22:42:00 +0000351
Dan Shicf4d2032015-03-12 15:04:21 -0700352 # Container name is predefined so the container can be destroyed in
353 # handle_sigterm.
354 job_or_task_id = job_directories.get_job_id_or_task_id(
355 parser.options.results)
356 container_name = (lxc.TEST_CONTAINER_NAME_FMT %
Dan Shid68d51c2015-04-21 17:00:42 -0700357 (job_or_task_id, time.time(), os.getpid()))
Dan Shiafa63872016-02-23 15:32:31 -0800358 job_folder = job_directories.get_job_folder_name(parser.options.results)
Dan Shicf4d2032015-03-12 15:04:21 -0700359
jadmanski0afbb632008-06-06 21:10:57 +0000360 # Implement SIGTERM handler
mblighc2299562009-07-02 19:00:36 +0000361 def handle_sigterm(signum, frame):
Simran Basi9d9b7292013-10-16 16:44:07 -0700362 logging.debug('Received SIGTERM')
mblighff7d61f2008-12-22 14:53:35 +0000363 if pid_file_manager:
364 pid_file_manager.close_file(1, signal.SIGTERM)
Simran Basi49e21e62013-10-17 12:40:33 -0700365 logging.debug('Finished writing to pid_file. Killing process.')
Dan Shi3f1b8a52015-04-21 11:11:06 -0700366
367 # Update results folder's file permission. This needs to be done ASAP
368 # before the parsing process tries to access the log.
369 if use_ssp and results:
370 correct_results_folder_permission(results)
371
Simran Basid6b83772014-01-06 16:31:30 -0800372 # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
373 # This sleep allows the pending output to be logged before the kill
374 # signal is sent.
375 time.sleep(.1)
Dan Shic68fefb2015-04-07 10:10:52 -0700376 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700377 logging.debug('Destroy container %s before aborting the autoserv '
378 'process.', container_name)
Dan Shi3f1b8a52015-04-21 11:11:06 -0700379 metadata = {'drone': socket.gethostname(),
380 'job_id': job_or_task_id,
381 'container_name': container_name,
382 'action': 'abort',
383 'success': True}
Dan Shicf4d2032015-03-12 15:04:21 -0700384 try:
385 bucket = lxc.ContainerBucket()
386 container = bucket.get(container_name)
387 if container:
388 container.destroy()
389 else:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700390 metadata['success'] = False
391 metadata['error'] = 'container not found'
Dan Shicf4d2032015-03-12 15:04:21 -0700392 logging.debug('Container %s is not found.', container_name)
393 except:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700394 metadata['success'] = False
395 metadata['error'] = 'Exception: %s' % sys.exc_info()
Dan Shicf4d2032015-03-12 15:04:21 -0700396 # Handle any exception so the autoserv process can be aborted.
397 logging.error('Failed to destroy container %s. Error: %s',
398 container_name, sys.exc_info())
Dan Shi3f1b8a52015-04-21 11:11:06 -0700399 autotest_es.post(use_http=True,
400 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
401 metadata=metadata)
Dan Shie4a4f9f2015-07-20 09:00:25 -0700402 # Try to correct the result file permission again after the
403 # container is destroyed, as the container might have created some
404 # new files in the result folder.
405 if results:
406 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700407
jadmanski0afbb632008-06-06 21:10:57 +0000408 os.killpg(os.getpgrp(), signal.SIGKILL)
mblighfaf0cd42007-11-19 16:00:24 +0000409
jadmanski0afbb632008-06-06 21:10:57 +0000410 # Set signal handler
mblighc2299562009-07-02 19:00:36 +0000411 signal.signal(signal.SIGTERM, handle_sigterm)
mbligha46678d2008-05-01 20:00:01 +0000412
Simran Basid6b83772014-01-06 16:31:30 -0800413 # faulthandler is only needed to debug in the Lab and is not avaliable to
414 # be imported in the chroot as part of VMTest, so Try-Except it.
415 try:
416 import faulthandler
417 faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
418 logging.debug('faulthandler registered on SIGTERM.')
419 except ImportError:
Christopher Grant4beca022015-06-16 15:14:47 -0400420 sys.exc_clear()
Simran Basid6b83772014-01-06 16:31:30 -0800421
David Rochberg8a60d1e2011-02-01 14:22:07 -0500422 # Ignore SIGTTOU's generated by output from forked children.
423 signal.signal(signal.SIGTTOU, signal.SIG_IGN)
424
Alex Millerf1af17e2013-01-09 22:50:32 -0800425 # If we received a SIGALARM, let's be loud about it.
426 signal.signal(signal.SIGALRM, log_alarm)
427
mbligha5f5e542009-12-30 16:57:49 +0000428 # Server side tests that call shell scripts often depend on $USER being set
429 # but depending on how you launch your autotest scheduler it may not be set.
430 os.environ['USER'] = getpass.getuser()
431
mblighb2bea302008-07-24 20:25:57 +0000432 label = parser.options.label
mbligh374f3412009-05-13 21:29:45 +0000433 group_name = parser.options.group_name
mblighb2bea302008-07-24 20:25:57 +0000434 user = parser.options.user
435 client = parser.options.client
436 server = parser.options.server
jadmanski0afbb632008-06-06 21:10:57 +0000437 install_before = parser.options.install_before
mblighb2bea302008-07-24 20:25:57 +0000438 install_after = parser.options.install_after
439 verify = parser.options.verify
440 repair = parser.options.repair
showard45ae8192008-11-05 19:32:53 +0000441 cleanup = parser.options.cleanup
Alex Millercb79ba72013-05-29 14:43:00 -0700442 provision = parser.options.provision
Dan Shi07e09af2013-04-12 09:31:29 -0700443 reset = parser.options.reset
Alex Miller667b5f22014-02-28 15:33:39 -0800444 job_labels = parser.options.job_labels
mblighb2bea302008-07-24 20:25:57 +0000445 no_tee = parser.options.no_tee
jadmanski0afbb632008-06-06 21:10:57 +0000446 parse_job = parser.options.parse_job
mblighe7d9c602009-07-02 19:02:33 +0000447 execution_tag = parser.options.execution_tag
448 if not execution_tag:
449 execution_tag = parse_job
jadmanski0afbb632008-06-06 21:10:57 +0000450 ssh_user = parser.options.ssh_user
451 ssh_port = parser.options.ssh_port
452 ssh_pass = parser.options.ssh_pass
jadmanskidef0c3c2009-03-25 20:07:10 +0000453 collect_crashinfo = parser.options.collect_crashinfo
mblighe0cbc912010-03-11 18:03:07 +0000454 control_filename = parser.options.control_filename
Scott Zawalski91493c82013-01-25 16:15:20 -0500455 test_retry = parser.options.test_retry
beepscb6f1e22013-06-28 19:14:10 -0700456 verify_job_repo_url = parser.options.verify_job_repo_url
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700457 skip_crash_collection = parser.options.skip_crash_collection
Aviv Keshet18ee3142013-08-12 15:01:51 -0700458 ssh_verbosity = int(parser.options.ssh_verbosity)
Fang Deng6cc20de2013-09-06 15:47:32 -0700459 ssh_options = parser.options.ssh_options
Dan Shib669cbd2013-09-13 11:17:17 -0700460 no_use_packaging = parser.options.no_use_packaging
Simran Basi1bf60eb2015-12-01 16:39:29 -0800461 host_attributes = parser.options.host_attributes
462 in_lab = bool(parser.options.lab)
mbligha46678d2008-05-01 20:00:01 +0000463
mblighb2bea302008-07-24 20:25:57 +0000464 # can't be both a client and a server side test
465 if client and server:
Eric Li861b2d52011-02-04 14:50:35 -0800466 parser.parser.error("Can not specify a test as both server and client!")
mblighb2bea302008-07-24 20:25:57 +0000467
Alex Millercb79ba72013-05-29 14:43:00 -0700468 if provision and client:
469 parser.parser.error("Cannot specify provisioning and client!")
470
471 is_special_task = (verify or repair or cleanup or collect_crashinfo or
Dan Shi07e09af2013-04-12 09:31:29 -0700472 provision or reset)
Alex Millercb79ba72013-05-29 14:43:00 -0700473 if len(parser.args) < 1 and not is_special_task:
Eric Li861b2d52011-02-04 14:50:35 -0800474 parser.parser.error("Missing argument: control file")
mbligha46678d2008-05-01 20:00:01 +0000475
Aviv Keshet18ee3142013-08-12 15:01:51 -0700476 if ssh_verbosity > 0:
477 # ssh_verbosity is an integer between 0 and 3, inclusive
478 ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
Fang Dengd1c2b732013-08-20 12:59:46 -0700479 else:
480 ssh_verbosity_flag = ''
Aviv Keshet18ee3142013-08-12 15:01:51 -0700481
showard45ae8192008-11-05 19:32:53 +0000482 # We have a control file unless it's just a verify/repair/cleanup job
jadmanski0afbb632008-06-06 21:10:57 +0000483 if len(parser.args) > 0:
484 control = parser.args[0]
485 else:
486 control = None
mbligha46678d2008-05-01 20:00:01 +0000487
Dan Shicf4d2032015-03-12 15:04:21 -0700488 machines = _get_machines(parser)
mbligh374f3412009-05-13 21:29:45 +0000489 if group_name and len(machines) < 2:
Dan Shicf4d2032015-03-12 15:04:21 -0700490 parser.parser.error('-G %r may only be supplied with more than one '
491 'machine.' % group_name)
mbligh374f3412009-05-13 21:29:45 +0000492
Christopher Wiley8a91f232013-07-09 11:02:27 -0700493 kwargs = {'group_name': group_name, 'tag': execution_tag,
Dan Shicf4d2032015-03-12 15:04:21 -0700494 'disable_sysinfo': parser.options.disable_sysinfo}
Dan Shi70647ca2015-07-16 22:52:35 -0700495 if parser.options.parent_job_id:
496 kwargs['parent_job_id'] = int(parser.options.parent_job_id)
mblighe0cbc912010-03-11 18:03:07 +0000497 if control_filename:
498 kwargs['control_filename'] = control_filename
Simran Basi1bf60eb2015-12-01 16:39:29 -0800499 if host_attributes:
500 kwargs['host_attributes'] = host_attributes
501 kwargs['in_lab'] = in_lab
jadmanski0afbb632008-06-06 21:10:57 +0000502 job = server_job.server_job(control, parser.args[1:], results, label,
503 user, machines, client, parse_job,
Fang Dengd1c2b732013-08-20 12:59:46 -0700504 ssh_user, ssh_port, ssh_pass,
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700505 ssh_verbosity_flag, ssh_options,
506 test_retry, **kwargs)
Dan Shicf4d2032015-03-12 15:04:21 -0700507
showard75cdfee2009-06-10 17:40:41 +0000508 job.logging.start_logging()
mbligh4608b002010-01-05 18:22:35 +0000509 job.init_parser()
mbligha46678d2008-05-01 20:00:01 +0000510
mbligh161fe6f2008-06-19 16:26:04 +0000511 # perform checks
512 job.precheck()
513
jadmanski0afbb632008-06-06 21:10:57 +0000514 # run the job
515 exit_code = 0
Dan Shic1b8bdd2015-09-14 23:11:24 -0700516 auto_start_servod = _CONFIG.get_config_value(
517 'AUTOSERV', 'auto_start_servod', type=bool, default=False)
jadmanski0afbb632008-06-06 21:10:57 +0000518 try:
mbligh332000a2009-06-08 16:47:28 +0000519 try:
520 if repair:
Dan Shic1b8bdd2015-09-14 23:11:24 -0700521 if auto_start_servod and len(machines) == 1:
522 _start_servod(machines[0])
J. Richard Barnettec2d99cf2015-11-18 12:46:15 -0800523 job.repair(job_labels)
mbligh332000a2009-06-08 16:47:28 +0000524 elif verify:
Alex Miller667b5f22014-02-28 15:33:39 -0800525 job.verify(job_labels)
Alex Millercb79ba72013-05-29 14:43:00 -0700526 elif provision:
Alex Miller667b5f22014-02-28 15:33:39 -0800527 job.provision(job_labels)
Dan Shi07e09af2013-04-12 09:31:29 -0700528 elif reset:
Alex Miller667b5f22014-02-28 15:33:39 -0800529 job.reset(job_labels)
Fang Dengad78aca2014-10-02 18:15:46 -0700530 elif cleanup:
531 job.cleanup(job_labels)
mbligh332000a2009-06-08 16:47:28 +0000532 else:
Dan Shia06f3e22015-09-03 16:15:15 -0700533 if auto_start_servod and len(machines) == 1:
534 _start_servod(machines[0])
Dan Shic68fefb2015-04-07 10:10:52 -0700535 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700536 try:
Dan Shi37befda2015-12-07 13:16:56 -0800537 _run_with_ssp(job, container_name, job_or_task_id,
Dan Shiafa63872016-02-23 15:32:31 -0800538 results, parser, ssp_url, job_folder)
Dan Shicf4d2032015-03-12 15:04:21 -0700539 finally:
540 # Update the ownership of files in result folder.
Dan Shi3f1b8a52015-04-21 11:11:06 -0700541 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700542 else:
Dan Shiafa63872016-02-23 15:32:31 -0800543 if collect_crashinfo:
544 # Update the ownership of files in result folder. If the
545 # job to collect crashinfo was running inside container
546 # (SSP) and crashed before correcting folder permission,
547 # the result folder might have wrong permission setting.
548 try:
549 correct_results_folder_permission(results)
550 except:
551 # Ignore any error as the user may not have root
552 # permission to run sudo command.
553 pass
Dan Shicf4d2032015-03-12 15:04:21 -0700554 job.run(install_before, install_after,
555 verify_job_repo_url=verify_job_repo_url,
556 only_collect_crashinfo=collect_crashinfo,
557 skip_crash_collection=skip_crash_collection,
Dan Shib669cbd2013-09-13 11:17:17 -0700558 job_labels=job_labels,
559 use_packaging=(not no_use_packaging))
mbligh332000a2009-06-08 16:47:28 +0000560 finally:
561 while job.hosts:
562 host = job.hosts.pop()
563 host.close()
jadmanski0afbb632008-06-06 21:10:57 +0000564 except:
jadmanski27b37ea2008-10-29 23:54:31 +0000565 exit_code = 1
jadmanski0afbb632008-06-06 21:10:57 +0000566 traceback.print_exc()
mbligha46678d2008-05-01 20:00:01 +0000567
mblighff7d61f2008-12-22 14:53:35 +0000568 if pid_file_manager:
569 pid_file_manager.num_tests_failed = job.num_tests_failed
570 pid_file_manager.close_file(exit_code)
jadmanskie0dffc32008-12-15 17:30:30 +0000571 job.cleanup_parser()
showard21baa452008-10-21 00:08:39 +0000572
jadmanski27b37ea2008-10-29 23:54:31 +0000573 sys.exit(exit_code)
mbligha46678d2008-05-01 20:00:01 +0000574
575
Fang Deng042c1472014-10-23 13:56:41 -0700576def record_autoserv(options, duration_secs):
577 """Record autoserv end-to-end time in metadata db.
578
579 @param options: parser options.
580 @param duration_secs: How long autoserv has taken, in secs.
581 """
582 # Get machine hostname
583 machines = options.machines.replace(
584 ',', ' ').strip().split() if options.machines else []
585 num_machines = len(machines)
586 if num_machines > 1:
587 # Skip the case where atomic group is used.
588 return
589 elif num_machines == 0:
590 machines.append('hostless')
591
592 # Determine the status that will be reported.
593 s = job_overhead.STATUS
594 task_mapping = {
595 'reset': s.RESETTING, 'verify': s.VERIFYING,
596 'provision': s.PROVISIONING, 'repair': s.REPAIRING,
597 'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
Dan Shi888cfca2015-07-31 15:49:00 -0700598 match = filter(lambda task: getattr(options, task, False) == True,
599 task_mapping)
Fang Deng042c1472014-10-23 13:56:41 -0700600 status = task_mapping[match[0]] if match else s.RUNNING
601 is_special_task = status not in [s.RUNNING, s.GATHERING]
Dan Shicf4d2032015-03-12 15:04:21 -0700602 job_or_task_id = job_directories.get_job_id_or_task_id(options.results)
Fang Deng042c1472014-10-23 13:56:41 -0700603 job_overhead.record_state_duration(
604 job_or_task_id, machines[0], status, duration_secs,
605 is_special_task=is_special_task)
606
607
mbligha46678d2008-05-01 20:00:01 +0000608def main():
Fang Deng042c1472014-10-23 13:56:41 -0700609 start_time = datetime.datetime.now()
Dan Shia1ecd5c2013-06-06 11:21:31 -0700610 # White list of tests with run time measurement enabled.
Dan Shia06f3e22015-09-03 16:15:15 -0700611 measure_run_time_tests_names = _CONFIG.get_config_value(
612 'AUTOSERV', 'measure_run_time_tests', type=str)
Dan Shia1ecd5c2013-06-06 11:21:31 -0700613 if measure_run_time_tests_names:
614 measure_run_time_tests = [t.strip() for t in
615 measure_run_time_tests_names.split(',')]
616 else:
617 measure_run_time_tests = []
jadmanski0afbb632008-06-06 21:10:57 +0000618 # grab the parser
619 parser = autoserv_parser.autoserv_parser
mbligha5cb4062009-02-17 15:53:39 +0000620 parser.parse_args()
mbligha46678d2008-05-01 20:00:01 +0000621
jadmanski0afbb632008-06-06 21:10:57 +0000622 if len(sys.argv) == 1:
623 parser.parser.print_help()
624 sys.exit(1)
mbligha6f13082008-06-05 23:53:46 +0000625
Dan Shicf4d2032015-03-12 15:04:21 -0700626 # If the job requires to run with server-side package, try to stage server-
627 # side package first. If that fails with error that autotest server package
Dan Shic68fefb2015-04-07 10:10:52 -0700628 # does not exist, fall back to run the job without using server-side
629 # packaging. If option warn_no_ssp is specified, that means autoserv is
630 # running in a drone does not support SSP, thus no need to stage server-side
631 # package.
Dan Shicf4d2032015-03-12 15:04:21 -0700632 ssp_url = None
Dan Shi0b754c52015-04-20 14:20:38 -0700633 ssp_url_warning = False
Dan Shic68fefb2015-04-07 10:10:52 -0700634 if (not parser.options.warn_no_ssp and parser.options.require_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700635 ssp_url = _stage_ssp(parser)
Dan Shi0b754c52015-04-20 14:20:38 -0700636 # The build does not have autotest server package. Fall back to not
637 # to use server-side package. Logging is postponed until logging being
638 # set up.
639 ssp_url_warning = not ssp_url
Dan Shicf4d2032015-03-12 15:04:21 -0700640
showard75cdfee2009-06-10 17:40:41 +0000641 if parser.options.no_logging:
642 results = None
643 else:
644 results = parser.options.results
mbligh80e1eba2008-11-19 00:26:18 +0000645 if not results:
646 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
647 results = os.path.abspath(results)
showard566d3c02010-01-12 18:57:01 +0000648 resultdir_exists = False
649 for filename in ('control.srv', 'status.log', '.autoserv_execute'):
650 if os.path.exists(os.path.join(results, filename)):
651 resultdir_exists = True
mbligh4608b002010-01-05 18:22:35 +0000652 if not parser.options.use_existing_results and resultdir_exists:
mbligh80e1eba2008-11-19 00:26:18 +0000653 error = "Error: results directory already exists: %s\n" % results
654 sys.stderr.write(error)
655 sys.exit(1)
mbligha788dc42009-03-26 21:10:16 +0000656
657 # Now that we certified that there's no leftover results dir from
658 # previous jobs, lets create the result dir since the logging system
659 # needs to create the log file in there.
660 if not os.path.isdir(results):
661 os.makedirs(results)
showard75cdfee2009-06-10 17:40:41 +0000662
Dan Shic68fefb2015-04-07 10:10:52 -0700663 # Server-side packaging will only be used if it's required and the package
664 # is available. If warn_no_ssp is specified, it means that autoserv is
665 # running in a drone does not have SSP supported and a warning will be logs.
666 # Therefore, it should not run with SSP.
667 use_ssp = (not parser.options.warn_no_ssp and parser.options.require_ssp
668 and ssp_url)
669 if use_ssp:
Dan Shie28de552015-05-06 16:51:58 -0700670 log_dir = os.path.join(results, 'ssp_logs') if results else None
Dan Shicf4d2032015-03-12 15:04:21 -0700671 if log_dir and not os.path.exists(log_dir):
672 os.makedirs(log_dir)
673 else:
674 log_dir = results
Dan Shi3f1b8a52015-04-21 11:11:06 -0700675
showard75cdfee2009-06-10 17:40:41 +0000676 logging_manager.configure_logging(
Dan Shicf4d2032015-03-12 15:04:21 -0700677 server_logging_config.ServerLoggingConfig(),
678 results_dir=log_dir,
showard10d84172009-06-18 23:16:50 +0000679 use_console=not parser.options.no_tee,
680 verbose=parser.options.verbose,
681 no_console_prefix=parser.options.no_console_prefix)
Dan Shicf4d2032015-03-12 15:04:21 -0700682
Dan Shi0b754c52015-04-20 14:20:38 -0700683 if ssp_url_warning:
684 logging.warn(
685 'Autoserv is required to run with server-side packaging. '
686 'However, no server-side package can be found based on '
Dan Shi6450e142016-03-11 11:52:20 -0800687 '`--image`, host attribute job_repo_url or host OS version '
688 'label. It could be that the build to test is older than the '
689 'minimum version that supports server-side packaging. The test '
690 'will be executed without using erver-side packaging.')
Dan Shi0b754c52015-04-20 14:20:38 -0700691
showard75cdfee2009-06-10 17:40:41 +0000692 if results:
mbligha788dc42009-03-26 21:10:16 +0000693 logging.info("Results placed in %s" % results)
mbligh10717632008-11-19 00:21:57 +0000694
mbligh4608b002010-01-05 18:22:35 +0000695 # wait until now to perform this check, so it get properly logged
Dan Shicf4d2032015-03-12 15:04:21 -0700696 if (parser.options.use_existing_results and not resultdir_exists and
Dan Shiff78f112015-06-12 13:34:02 -0700697 not utils.is_in_container()):
mbligh4608b002010-01-05 18:22:35 +0000698 logging.error("No existing results directory found: %s", results)
699 sys.exit(1)
700
Dan Shicf4d2032015-03-12 15:04:21 -0700701 logging.debug('autoserv is running in drone %s.', socket.gethostname())
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700702 logging.debug('autoserv command was: %s', ' '.join(sys.argv))
mbligh4608b002010-01-05 18:22:35 +0000703
Dan Shicf4d2032015-03-12 15:04:21 -0700704 if parser.options.write_pidfile and results:
mbligh4608b002010-01-05 18:22:35 +0000705 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
706 results)
jadmanskid5ab8c52008-12-03 16:27:07 +0000707 pid_file_manager.open_file()
mblighff7d61f2008-12-22 14:53:35 +0000708 else:
709 pid_file_manager = None
mbligha46678d2008-05-01 20:00:01 +0000710
jadmanskif22fea82008-11-26 20:57:07 +0000711 autotest.BaseAutotest.set_install_in_tmpdir(
712 parser.options.install_in_tmpdir)
713
Dan Shia1ecd5c2013-06-06 11:21:31 -0700714 timer = None
715 try:
716 # Take the first argument as control file name, get the test name from
717 # the control file. If the test name exists in the list of tests with
718 # run time measurement enabled, start a timer to begin measurement.
719 if (len(parser.args) > 0 and parser.args[0] != '' and
720 parser.options.machines):
Dan Shibbc16132013-07-09 16:23:59 -0700721 try:
722 test_name = control_data.parse_control(parser.args[0],
723 raise_warnings=True).name
724 except control_data.ControlVariableException:
725 logging.debug('Failed to retrieve test name from control file.')
726 test_name = None
Dan Shia1ecd5c2013-06-06 11:21:31 -0700727 if test_name in measure_run_time_tests:
728 machines = parser.options.machines.replace(',', ' '
729 ).strip().split()
Dan Shi8eac5af2014-09-17 00:15:15 -0700730 try:
Kevin Chengadc99f92016-07-20 08:21:58 -0700731 afe = frontend_wrappers.RetryingAFE(timeout_min=5,
732 delay_sec=10)
Dan Shi8eac5af2014-09-17 00:15:15 -0700733 board = server_utils.get_board_from_afe(machines[0], afe)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800734 timer = autotest_stats.Timer('autoserv_run_time.%s.%s' %
735 (board, test_name))
Dan Shi8eac5af2014-09-17 00:15:15 -0700736 timer.start()
737 except (urllib2.HTTPError, urllib2.URLError):
738 # Ignore error if RPC failed to get board
739 pass
Dan Shia1ecd5c2013-06-06 11:21:31 -0700740 except control_data.ControlVariableException as e:
741 logging.error(str(e))
jadmanski0afbb632008-06-06 21:10:57 +0000742 exit_code = 0
Prashanth B6285f6a2014-05-08 18:01:27 -0700743 # TODO(beeps): Extend this to cover different failure modes.
744 # Testing exceptions are matched against labels sent to autoserv. Eg,
745 # to allow only the hostless job to run, specify
746 # testing_exceptions: test_suite in the shadow_config. To allow both
747 # the hostless job and dummy_Pass to run, specify
748 # testing_exceptions: test_suite,dummy_Pass. You can figure out
749 # what label autoserv is invoked with by looking through the logs of a test
750 # for the autoserv command's -l option.
Dan Shia06f3e22015-09-03 16:15:15 -0700751 testing_exceptions = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700752 'AUTOSERV', 'testing_exceptions', type=list, default=[])
Dan Shia06f3e22015-09-03 16:15:15 -0700753 test_mode = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700754 'AUTOSERV', 'testing_mode', type=bool, default=False)
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800755 test_mode = (results_mocker and test_mode and not
756 any([ex in parser.options.label
757 for ex in testing_exceptions]))
758 is_task = (parser.options.verify or parser.options.repair or
759 parser.options.provision or parser.options.reset or
760 parser.options.cleanup or parser.options.collect_crashinfo)
jadmanski0afbb632008-06-06 21:10:57 +0000761 try:
762 try:
Prashanth B6285f6a2014-05-08 18:01:27 -0700763 if test_mode:
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800764 # The parser doesn't run on tasks anyway, so we can just return
765 # happy signals without faking results.
766 if not is_task:
767 machine = parser.options.results.split('/')[-1]
768
769 # TODO(beeps): The proper way to do this would be to
770 # refactor job creation so we can invoke job.record
771 # directly. To do that one needs to pipe the test_name
772 # through run_autoserv and bail just before invoking
773 # the server job. See the comment in
774 # puppylab/results_mocker for more context.
775 results_mocker.ResultsMocker(
Prashanth Balasubramanian22dd2262014-11-28 18:19:18 -0800776 test_name if test_name else 'unknown-test',
777 parser.options.results, machine
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800778 ).mock_results()
779 return
Prashanth B6285f6a2014-05-08 18:01:27 -0700780 else:
Dan Shic68fefb2015-04-07 10:10:52 -0700781 run_autoserv(pid_file_manager, results, parser, ssp_url,
782 use_ssp)
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700783 except SystemExit as e:
jadmanski0afbb632008-06-06 21:10:57 +0000784 exit_code = e.code
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700785 if exit_code:
786 logging.exception(e)
787 except Exception as e:
jadmanski0afbb632008-06-06 21:10:57 +0000788 # If we don't know what happened, we'll classify it as
789 # an 'abort' and return 1.
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700790 logging.exception(e)
jadmanski0afbb632008-06-06 21:10:57 +0000791 exit_code = 1
792 finally:
mblighff7d61f2008-12-22 14:53:35 +0000793 if pid_file_manager:
794 pid_file_manager.close_file(exit_code)
Dan Shia1ecd5c2013-06-06 11:21:31 -0700795 if timer:
796 timer.stop()
Fang Deng042c1472014-10-23 13:56:41 -0700797 # Record the autoserv duration time. Must be called
798 # just before the system exits to ensure accuracy.
799 duration_secs = (datetime.datetime.now() - start_time).total_seconds()
800 record_autoserv(parser.options, duration_secs)
jadmanski0afbb632008-06-06 21:10:57 +0000801 sys.exit(exit_code)
mblighfaf0cd42007-11-19 16:00:24 +0000802
mblighbb421852008-03-11 22:36:16 +0000803
mbligha46678d2008-05-01 20:00:01 +0000804if __name__ == '__main__':
jadmanski0afbb632008-06-06 21:10:57 +0000805 main()