blob: e55f20939a5418f9a1abc720d1242ee0a7264974 [file] [log] [blame]
mbligh6203ace2007-10-04 21:54:24 +00001#!/usr/bin/python -u
mbligh1ffd5dc2008-11-25 13:24:05 +00002# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
mbligh82648e52008-11-20 16:54:25 +00003# Released under the GPL v2
mblighdcd57a82007-07-11 23:06:47 +00004
mblighc8949b82007-07-23 16:33:58 +00005"""
Aviv Keshetde6bb192013-01-30 16:17:22 -08006Run a control file through the server side engine
mblighdcd57a82007-07-11 23:06:47 +00007"""
mbligh1ffd5dc2008-11-25 13:24:05 +00008
Fang Deng042c1472014-10-23 13:56:41 -07009import ast
10import datetime
11import getpass
12import logging
13import os
14import re
15import signal
Dan Shicf4d2032015-03-12 15:04:21 -070016import socket
Fang Deng042c1472014-10-23 13:56:41 -070017import sys
18import traceback
19import time
20import urllib2
mbligh1ffd5dc2008-11-25 13:24:05 +000021
mblighf5427bb2008-04-09 15:55:57 +000022import common
mbligh9ff89cd2009-09-03 20:28:17 +000023
Dan Shia1ecd5c2013-06-06 11:21:31 -070024from autotest_lib.client.common_lib import control_data
Dan Shi32649b82015-08-29 20:53:36 -070025from autotest_lib.client.common_lib import error
Dan Shia1ecd5c2013-06-06 11:21:31 -070026from autotest_lib.client.common_lib import global_config
Dan Shi5ddf6a32015-05-02 00:22:01 -070027from autotest_lib.client.common_lib import utils
Dan Shi37bee222015-04-13 15:46:47 -070028from autotest_lib.client.common_lib.cros.graphite import autotest_es
29from autotest_lib.client.common_lib.cros.graphite import autotest_stats
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -080030try:
31 from autotest_lib.puppylab import results_mocker
32except ImportError:
33 results_mocker = None
34
Dan Shia06f3e22015-09-03 16:15:15 -070035_CONFIG = global_config.global_config
36
37require_atfork = _CONFIG.get_config_value(
mblighcb8cb332009-09-03 21:08:56 +000038 'AUTOSERV', 'require_atfork_module', type=bool, default=True)
39
Dan Shia1ecd5c2013-06-06 11:21:31 -070040
Jakob Jueliche497b552014-09-23 19:11:59 -070041# Number of seconds to wait before returning if testing mode is enabled
Prashanth B6285f6a2014-05-08 18:01:27 -070042TESTING_MODE_SLEEP_SECS = 1
Jakob Jueliche497b552014-09-23 19:11:59 -070043
mblighcb8cb332009-09-03 21:08:56 +000044try:
45 import atfork
46 atfork.monkeypatch_os_fork_functions()
47 import atfork.stdlib_fixer
48 # Fix the Python standard library for threading+fork safety with its
49 # internal locks. http://code.google.com/p/python-atfork/
50 import warnings
51 warnings.filterwarnings('ignore', 'logging module already imported')
52 atfork.stdlib_fixer.fix_logging_module()
53except ImportError, e:
54 from autotest_lib.client.common_lib import global_config
Dan Shia06f3e22015-09-03 16:15:15 -070055 if _CONFIG.get_config_value(
mblighcb8cb332009-09-03 21:08:56 +000056 'AUTOSERV', 'require_atfork_module', type=bool, default=False):
57 print >>sys.stderr, 'Please run utils/build_externals.py'
58 print e
59 sys.exit(1)
mbligh9ff89cd2009-09-03 20:28:17 +000060
Dan Shia1ecd5c2013-06-06 11:21:31 -070061from autotest_lib.server import frontend
showard75cdfee2009-06-10 17:40:41 +000062from autotest_lib.server import server_logging_config
showard043c62a2009-06-10 19:48:57 +000063from autotest_lib.server import server_job, utils, autoserv_parser, autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070064from autotest_lib.server import utils as server_utils
Dan Shicf4d2032015-03-12 15:04:21 -070065from autotest_lib.site_utils import job_directories
Fang Deng042c1472014-10-23 13:56:41 -070066from autotest_lib.site_utils import job_overhead
Dan Shicf4d2032015-03-12 15:04:21 -070067from autotest_lib.site_utils import lxc
Dan Shi7836d252015-04-27 15:33:58 -070068from autotest_lib.site_utils import lxc_utils
showard75cdfee2009-06-10 17:40:41 +000069from autotest_lib.client.common_lib import pidfile, logging_manager
Gabe Black1e1c41b2015-02-04 23:55:15 -080070from autotest_lib.client.common_lib.cros.graphite import autotest_stats
mbligh92c0fc22008-11-20 16:52:23 +000071
Dan Shicf4d2032015-03-12 15:04:21 -070072# Control segment to stage server-side package.
73STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
74 'stage_server_side_package')
75
Dan Shia06f3e22015-09-03 16:15:15 -070076# Command line to start servod in a moblab.
77START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s'
78STOP_SERVOD_CMD = 'sudo stop servod'
79
Alex Millerf1af17e2013-01-09 22:50:32 -080080def log_alarm(signum, frame):
81 logging.error("Received SIGALARM. Ignoring and continuing on.")
Alex Miller0528d6f2013-01-11 10:49:48 -080082 sys.exit(1)
Alex Millerf1af17e2013-01-09 22:50:32 -080083
Dan Shicf4d2032015-03-12 15:04:21 -070084
85def _get_machines(parser):
86 """Get a list of machine names from command line arg -m or a file.
87
88 @param parser: Parser for the command line arguments.
89
90 @return: A list of machine names from command line arg -m or the
91 machines file specified in the command line arg -M.
92 """
93 if parser.options.machines:
94 machines = parser.options.machines.replace(',', ' ').strip().split()
95 else:
96 machines = []
97 machines_file = parser.options.machines_file
98 if machines_file:
99 machines = []
100 for m in open(machines_file, 'r').readlines():
101 # remove comments, spaces
102 m = re.sub('#.*', '', m).strip()
103 if m:
104 machines.append(m)
105 logging.debug('Read list of machines from file: %s', machines_file)
106 logging.debug('Machines: %s', ','.join(machines))
107
108 if machines:
109 for machine in machines:
110 if not machine or re.search('\s', machine):
111 parser.parser.error("Invalid machine: %s" % str(machine))
112 machines = list(set(machines))
113 machines.sort()
114 return machines
115
116
117def _stage_ssp(parser):
118 """Stage server-side package.
119
120 This function calls a control segment to stage server-side package based on
121 the job and autoserv command line option. The detail implementation could
122 be different for each host type. Currently, only CrosHost has
123 stage_server_side_package function defined.
124 The script returns None if no server-side package is available. However,
125 it may raise exception if it failed for reasons other than artifact (the
126 server-side package) not found.
127
128 @param parser: Command line arguments parser passed in the autoserv process.
129
130 @return: url of the staged server-side package. Return None if server-
131 side package is not found for the build.
132 """
Dan Shi36cfd832014-10-10 13:38:51 -0700133 # If test_source_build is not specified, default to use server-side test
134 # code from build specified in --image.
Dan Shicf4d2032015-03-12 15:04:21 -0700135 namespace = {'machines': _get_machines(parser),
Dan Shi36cfd832014-10-10 13:38:51 -0700136 'image': (parser.options.test_source_build or
137 parser.options.image),}
Dan Shicf4d2032015-03-12 15:04:21 -0700138 script_locals = {}
139 execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
140 return script_locals['ssp_url']
141
142
143def _run_with_ssp(container_name, job_id, results, parser, ssp_url):
144 """Run the server job with server-side packaging.
145
146 @param container_name: Name of the container to run the test.
147 @param job_id: ID of the test job.
148 @param results: Folder to store results. This could be different from
149 parser.options.results:
150 parser.options.results can be set to None for results to be
151 stored in a temp folder.
152 results can be None for autoserv run requires no logging.
153 @param parser: Command line parser that contains the options.
154 @param ssp_url: url of the staged server-side package.
155 """
156 bucket = lxc.ContainerBucket()
157 control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
158 else None)
159 test_container = bucket.setup_test(container_name, job_id, ssp_url, results,
160 control=control)
161 args = sys.argv[:]
162 args.remove('--require-ssp')
Dan Shi77b79a62015-07-29 16:22:05 -0700163 # --parent_job_id is only useful in autoserv running in host, not in
164 # container. Include this argument will cause test to fail for builds before
165 # CL 286265 was merged.
166 if '--parent_job_id' in args:
167 index = args.index('--parent_job_id')
168 args.remove('--parent_job_id')
169 # Remove the actual parent job id in command line arg.
170 del args[index]
Dan Shicf4d2032015-03-12 15:04:21 -0700171
172 # A dictionary of paths to replace in the command line. Key is the path to
173 # be replaced with the one in value.
174 paths_to_replace = {}
175 # Replace the control file path with the one in container.
176 if control:
177 container_control_filename = os.path.join(
178 lxc.CONTROL_TEMP_PATH, os.path.basename(control))
179 paths_to_replace[control] = container_control_filename
180 # Update result directory with the one in container.
181 if parser.options.results:
182 container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_id)
183 paths_to_replace[parser.options.results] = container_result_dir
184 # Update parse_job directory with the one in container. The assumption is
185 # that the result folder to be parsed is always the same as the results_dir.
186 if parser.options.parse_job:
187 container_parse_dir = os.path.join(lxc.RESULT_DIR_FMT % job_id)
188 paths_to_replace[parser.options.parse_job] = container_result_dir
189
190 args = [paths_to_replace.get(arg, arg) for arg in args]
191
192 # Apply --use-existing-results, results directory is aready created and
193 # mounted in container. Apply this arg to avoid exception being raised.
194 if not '--use-existing-results' in args:
195 args.append('--use-existing-results')
196
197 # Make sure autoserv running in container using a different pid file.
198 if not '--pidfile-label' in args:
199 args.extend(['--pidfile-label', 'container_autoserv'])
200
Dan Shid1f51232015-04-18 00:29:14 -0700201 cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
Dan Shicf4d2032015-03-12 15:04:21 -0700202 logging.info('Run command in container: %s', cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700203 success = False
Dan Shicf4d2032015-03-12 15:04:21 -0700204 try:
205 test_container.attach_run(cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700206 success = True
Dan Shicf4d2032015-03-12 15:04:21 -0700207 finally:
Dan Shi37bee222015-04-13 15:46:47 -0700208 counter_key = '%s.%s' % (lxc.STATS_KEY,
209 'success' if success else 'fail')
210 autotest_stats.Counter(counter_key).increment()
211 # metadata is uploaded separately so it can use http to upload.
212 metadata = {'drone': socket.gethostname(),
213 'job_id': job_id,
214 'success': success}
215 autotest_es.post(use_http=True,
216 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
217 metadata=metadata)
Dan Shicf4d2032015-03-12 15:04:21 -0700218 test_container.destroy()
219
220
Dan Shi3f1b8a52015-04-21 11:11:06 -0700221def correct_results_folder_permission(results):
222 """Make sure the results folder has the right permission settings.
223
224 For tests running with server-side packaging, the results folder has the
225 owner of root. This must be changed to the user running the autoserv
226 process, so parsing job can access the results folder.
227 TODO(dshi): crbug.com/459344 Remove this function when test container can be
228 unprivileged container.
229
230 @param results: Path to the results folder.
231
232 """
233 if not results:
234 return
235
Dan Shi32649b82015-08-29 20:53:36 -0700236 try:
237 utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
238 utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
239 except error.CmdError as e:
240 metadata = {'error': str(e),
241 'result_folder': results,
242 'drone': socket.gethostname()}
243 autotest_es.post(use_http=True, type_str='correct_results_folder_failure',
244 metadata=metadata)
245 raise
Dan Shi3f1b8a52015-04-21 11:11:06 -0700246
247
Dan Shia06f3e22015-09-03 16:15:15 -0700248def _start_servod(machine):
249 """Try to start servod in moblab if it's not already running or running with
250 different board or port.
251
252 @param machine: Name of the dut used for test.
253 """
254 if not utils.is_moblab():
255 return
256
257 try:
258 afe = frontend.AFE()
259 board = server_utils.get_board_from_afe(machine, afe)
260 hosts = afe.get_hosts(hostname=machine)
261 servo_host = hosts[0].attributes.get('servo_host', None)
262 servo_port = hosts[0].attributes.get('servo_port', 9999)
263 if not servo_host in ['localhost', '127.0.0.1']:
264 return
265 except (urllib2.HTTPError, urllib2.URLError):
266 # Ignore error if RPC failed to get board
267 logging.error('Failed to get board name from AFE. Start servod is '
268 'aborted')
269 return
270
271 try:
272 pid = utils.run('pgrep servod').stdout
273 cmd_line = utils.run('ps -fp %s' % pid).stdout
274 if ('--board %s' % board in cmd_line and
275 '--port %s' % servo_port in cmd_line):
276 logging.debug('Servod is already running with given board and port.'
277 ' There is no need to restart servod.')
278 return
279 logging.debug('Servod is running with different board or port. '
280 'Stopping existing servod.')
281 utils.run('sudo stop servod')
282 except error.CmdError:
283 # servod is not running.
284 pass
285
286 try:
287 utils.run(START_SERVOD_CMD % (board, servo_port))
288 logging.debug('Servod is started')
289 except error.CmdError as e:
290 logging.error('Servod failed to be started, error: %s', e)
291
292
Dan Shic68fefb2015-04-07 10:10:52 -0700293def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700294 """Run server job with given options.
295
296 @param pid_file_manager: PidFileManager used to monitor the autoserv process
297 @param results: Folder to store results.
298 @param parser: Parser for the command line arguments.
299 @param ssp_url: Url to server-side package.
Dan Shic68fefb2015-04-07 10:10:52 -0700300 @param use_ssp: Set to True to run with server-side packaging.
Dan Shicf4d2032015-03-12 15:04:21 -0700301 """
Dan Shiec1d47d2015-02-13 11:38:13 -0800302 if parser.options.warn_no_ssp:
Dan Shic68fefb2015-04-07 10:10:52 -0700303 # Post a warning in the log.
Dan Shiec1d47d2015-02-13 11:38:13 -0800304 logging.warn('Autoserv is required to run with server-side packaging. '
305 'However, no drone is found to support server-side '
306 'packaging. The test will be executed in a drone without '
307 'server-side packaging supported.')
308
jadmanski0afbb632008-06-06 21:10:57 +0000309 # send stdin to /dev/null
310 dev_null = os.open(os.devnull, os.O_RDONLY)
311 os.dup2(dev_null, sys.stdin.fileno())
312 os.close(dev_null)
mblighdbf37612007-11-24 19:38:11 +0000313
jadmanski0afbb632008-06-06 21:10:57 +0000314 # Create separate process group
315 os.setpgrp()
mbligh1d42d4e2007-11-05 22:42:00 +0000316
Dan Shicf4d2032015-03-12 15:04:21 -0700317 # Container name is predefined so the container can be destroyed in
318 # handle_sigterm.
319 job_or_task_id = job_directories.get_job_id_or_task_id(
320 parser.options.results)
321 container_name = (lxc.TEST_CONTAINER_NAME_FMT %
Dan Shid68d51c2015-04-21 17:00:42 -0700322 (job_or_task_id, time.time(), os.getpid()))
Dan Shicf4d2032015-03-12 15:04:21 -0700323
jadmanski0afbb632008-06-06 21:10:57 +0000324 # Implement SIGTERM handler
mblighc2299562009-07-02 19:00:36 +0000325 def handle_sigterm(signum, frame):
Simran Basi9d9b7292013-10-16 16:44:07 -0700326 logging.debug('Received SIGTERM')
mblighff7d61f2008-12-22 14:53:35 +0000327 if pid_file_manager:
328 pid_file_manager.close_file(1, signal.SIGTERM)
Simran Basi49e21e62013-10-17 12:40:33 -0700329 logging.debug('Finished writing to pid_file. Killing process.')
Dan Shi3f1b8a52015-04-21 11:11:06 -0700330
331 # Update results folder's file permission. This needs to be done ASAP
332 # before the parsing process tries to access the log.
333 if use_ssp and results:
334 correct_results_folder_permission(results)
335
Simran Basid6b83772014-01-06 16:31:30 -0800336 # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
337 # This sleep allows the pending output to be logged before the kill
338 # signal is sent.
339 time.sleep(.1)
Dan Shic68fefb2015-04-07 10:10:52 -0700340 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700341 logging.debug('Destroy container %s before aborting the autoserv '
342 'process.', container_name)
Dan Shi3f1b8a52015-04-21 11:11:06 -0700343 metadata = {'drone': socket.gethostname(),
344 'job_id': job_or_task_id,
345 'container_name': container_name,
346 'action': 'abort',
347 'success': True}
Dan Shicf4d2032015-03-12 15:04:21 -0700348 try:
349 bucket = lxc.ContainerBucket()
350 container = bucket.get(container_name)
351 if container:
352 container.destroy()
353 else:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700354 metadata['success'] = False
355 metadata['error'] = 'container not found'
Dan Shicf4d2032015-03-12 15:04:21 -0700356 logging.debug('Container %s is not found.', container_name)
357 except:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700358 metadata['success'] = False
359 metadata['error'] = 'Exception: %s' % sys.exc_info()
Dan Shicf4d2032015-03-12 15:04:21 -0700360 # Handle any exception so the autoserv process can be aborted.
361 logging.error('Failed to destroy container %s. Error: %s',
362 container_name, sys.exc_info())
Dan Shi3f1b8a52015-04-21 11:11:06 -0700363 autotest_es.post(use_http=True,
364 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
365 metadata=metadata)
Dan Shie4a4f9f2015-07-20 09:00:25 -0700366 # Try to correct the result file permission again after the
367 # container is destroyed, as the container might have created some
368 # new files in the result folder.
369 if results:
370 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700371
jadmanski0afbb632008-06-06 21:10:57 +0000372 os.killpg(os.getpgrp(), signal.SIGKILL)
mblighfaf0cd42007-11-19 16:00:24 +0000373
jadmanski0afbb632008-06-06 21:10:57 +0000374 # Set signal handler
mblighc2299562009-07-02 19:00:36 +0000375 signal.signal(signal.SIGTERM, handle_sigterm)
mbligha46678d2008-05-01 20:00:01 +0000376
Simran Basid6b83772014-01-06 16:31:30 -0800377 # faulthandler is only needed to debug in the Lab and is not avaliable to
378 # be imported in the chroot as part of VMTest, so Try-Except it.
379 try:
380 import faulthandler
381 faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
382 logging.debug('faulthandler registered on SIGTERM.')
383 except ImportError:
Christopher Grant4beca022015-06-16 15:14:47 -0400384 sys.exc_clear()
Simran Basid6b83772014-01-06 16:31:30 -0800385
David Rochberg8a60d1e2011-02-01 14:22:07 -0500386 # Ignore SIGTTOU's generated by output from forked children.
387 signal.signal(signal.SIGTTOU, signal.SIG_IGN)
388
Alex Millerf1af17e2013-01-09 22:50:32 -0800389 # If we received a SIGALARM, let's be loud about it.
390 signal.signal(signal.SIGALRM, log_alarm)
391
mbligha5f5e542009-12-30 16:57:49 +0000392 # Server side tests that call shell scripts often depend on $USER being set
393 # but depending on how you launch your autotest scheduler it may not be set.
394 os.environ['USER'] = getpass.getuser()
395
mblighb2bea302008-07-24 20:25:57 +0000396 label = parser.options.label
mbligh374f3412009-05-13 21:29:45 +0000397 group_name = parser.options.group_name
mblighb2bea302008-07-24 20:25:57 +0000398 user = parser.options.user
399 client = parser.options.client
400 server = parser.options.server
jadmanski0afbb632008-06-06 21:10:57 +0000401 install_before = parser.options.install_before
mblighb2bea302008-07-24 20:25:57 +0000402 install_after = parser.options.install_after
403 verify = parser.options.verify
404 repair = parser.options.repair
showard45ae8192008-11-05 19:32:53 +0000405 cleanup = parser.options.cleanup
Alex Millercb79ba72013-05-29 14:43:00 -0700406 provision = parser.options.provision
Dan Shi07e09af2013-04-12 09:31:29 -0700407 reset = parser.options.reset
Alex Miller667b5f22014-02-28 15:33:39 -0800408 job_labels = parser.options.job_labels
mblighb2bea302008-07-24 20:25:57 +0000409 no_tee = parser.options.no_tee
jadmanski0afbb632008-06-06 21:10:57 +0000410 parse_job = parser.options.parse_job
mblighe7d9c602009-07-02 19:02:33 +0000411 execution_tag = parser.options.execution_tag
412 if not execution_tag:
413 execution_tag = parse_job
jadmanskifbc1f0a2008-07-09 14:12:54 +0000414 host_protection = parser.options.host_protection
jadmanski0afbb632008-06-06 21:10:57 +0000415 ssh_user = parser.options.ssh_user
416 ssh_port = parser.options.ssh_port
417 ssh_pass = parser.options.ssh_pass
jadmanskidef0c3c2009-03-25 20:07:10 +0000418 collect_crashinfo = parser.options.collect_crashinfo
mblighe0cbc912010-03-11 18:03:07 +0000419 control_filename = parser.options.control_filename
Scott Zawalski91493c82013-01-25 16:15:20 -0500420 test_retry = parser.options.test_retry
beepscb6f1e22013-06-28 19:14:10 -0700421 verify_job_repo_url = parser.options.verify_job_repo_url
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700422 skip_crash_collection = parser.options.skip_crash_collection
Aviv Keshet18ee3142013-08-12 15:01:51 -0700423 ssh_verbosity = int(parser.options.ssh_verbosity)
Fang Deng6cc20de2013-09-06 15:47:32 -0700424 ssh_options = parser.options.ssh_options
Dan Shib669cbd2013-09-13 11:17:17 -0700425 no_use_packaging = parser.options.no_use_packaging
mbligha46678d2008-05-01 20:00:01 +0000426
mblighb2bea302008-07-24 20:25:57 +0000427 # can't be both a client and a server side test
428 if client and server:
Eric Li861b2d52011-02-04 14:50:35 -0800429 parser.parser.error("Can not specify a test as both server and client!")
mblighb2bea302008-07-24 20:25:57 +0000430
Alex Millercb79ba72013-05-29 14:43:00 -0700431 if provision and client:
432 parser.parser.error("Cannot specify provisioning and client!")
433
434 is_special_task = (verify or repair or cleanup or collect_crashinfo or
Dan Shi07e09af2013-04-12 09:31:29 -0700435 provision or reset)
Alex Millercb79ba72013-05-29 14:43:00 -0700436 if len(parser.args) < 1 and not is_special_task:
Eric Li861b2d52011-02-04 14:50:35 -0800437 parser.parser.error("Missing argument: control file")
mbligha46678d2008-05-01 20:00:01 +0000438
Aviv Keshet18ee3142013-08-12 15:01:51 -0700439 if ssh_verbosity > 0:
440 # ssh_verbosity is an integer between 0 and 3, inclusive
441 ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
Fang Dengd1c2b732013-08-20 12:59:46 -0700442 else:
443 ssh_verbosity_flag = ''
Aviv Keshet18ee3142013-08-12 15:01:51 -0700444
showard45ae8192008-11-05 19:32:53 +0000445 # We have a control file unless it's just a verify/repair/cleanup job
jadmanski0afbb632008-06-06 21:10:57 +0000446 if len(parser.args) > 0:
447 control = parser.args[0]
448 else:
449 control = None
mbligha46678d2008-05-01 20:00:01 +0000450
Dan Shicf4d2032015-03-12 15:04:21 -0700451 machines = _get_machines(parser)
mbligh374f3412009-05-13 21:29:45 +0000452 if group_name and len(machines) < 2:
Dan Shicf4d2032015-03-12 15:04:21 -0700453 parser.parser.error('-G %r may only be supplied with more than one '
454 'machine.' % group_name)
mbligh374f3412009-05-13 21:29:45 +0000455
Christopher Wiley8a91f232013-07-09 11:02:27 -0700456 kwargs = {'group_name': group_name, 'tag': execution_tag,
Dan Shicf4d2032015-03-12 15:04:21 -0700457 'disable_sysinfo': parser.options.disable_sysinfo}
Dan Shi70647ca2015-07-16 22:52:35 -0700458 if parser.options.parent_job_id:
459 kwargs['parent_job_id'] = int(parser.options.parent_job_id)
mblighe0cbc912010-03-11 18:03:07 +0000460 if control_filename:
461 kwargs['control_filename'] = control_filename
jadmanski0afbb632008-06-06 21:10:57 +0000462 job = server_job.server_job(control, parser.args[1:], results, label,
463 user, machines, client, parse_job,
Fang Dengd1c2b732013-08-20 12:59:46 -0700464 ssh_user, ssh_port, ssh_pass,
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700465 ssh_verbosity_flag, ssh_options,
466 test_retry, **kwargs)
Dan Shicf4d2032015-03-12 15:04:21 -0700467
showard75cdfee2009-06-10 17:40:41 +0000468 job.logging.start_logging()
mbligh4608b002010-01-05 18:22:35 +0000469 job.init_parser()
mbligha46678d2008-05-01 20:00:01 +0000470
mbligh161fe6f2008-06-19 16:26:04 +0000471 # perform checks
472 job.precheck()
473
jadmanski0afbb632008-06-06 21:10:57 +0000474 # run the job
475 exit_code = 0
Dan Shic1b8bdd2015-09-14 23:11:24 -0700476 auto_start_servod = _CONFIG.get_config_value(
477 'AUTOSERV', 'auto_start_servod', type=bool, default=False)
jadmanski0afbb632008-06-06 21:10:57 +0000478 try:
mbligh332000a2009-06-08 16:47:28 +0000479 try:
480 if repair:
Dan Shic1b8bdd2015-09-14 23:11:24 -0700481 if auto_start_servod and len(machines) == 1:
482 _start_servod(machines[0])
Alex Miller667b5f22014-02-28 15:33:39 -0800483 job.repair(host_protection, job_labels)
mbligh332000a2009-06-08 16:47:28 +0000484 elif verify:
Alex Miller667b5f22014-02-28 15:33:39 -0800485 job.verify(job_labels)
Alex Millercb79ba72013-05-29 14:43:00 -0700486 elif provision:
Alex Miller667b5f22014-02-28 15:33:39 -0800487 job.provision(job_labels)
Dan Shi07e09af2013-04-12 09:31:29 -0700488 elif reset:
Alex Miller667b5f22014-02-28 15:33:39 -0800489 job.reset(job_labels)
Fang Dengad78aca2014-10-02 18:15:46 -0700490 elif cleanup:
491 job.cleanup(job_labels)
mbligh332000a2009-06-08 16:47:28 +0000492 else:
Dan Shia06f3e22015-09-03 16:15:15 -0700493 if auto_start_servod and len(machines) == 1:
494 _start_servod(machines[0])
Dan Shic68fefb2015-04-07 10:10:52 -0700495 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700496 try:
497 _run_with_ssp(container_name, job_or_task_id, results,
498 parser, ssp_url)
499 finally:
500 # Update the ownership of files in result folder.
Dan Shi3f1b8a52015-04-21 11:11:06 -0700501 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700502 else:
503 job.run(install_before, install_after,
504 verify_job_repo_url=verify_job_repo_url,
505 only_collect_crashinfo=collect_crashinfo,
506 skip_crash_collection=skip_crash_collection,
Dan Shib669cbd2013-09-13 11:17:17 -0700507 job_labels=job_labels,
508 use_packaging=(not no_use_packaging))
mbligh332000a2009-06-08 16:47:28 +0000509 finally:
510 while job.hosts:
511 host = job.hosts.pop()
512 host.close()
jadmanski0afbb632008-06-06 21:10:57 +0000513 except:
jadmanski27b37ea2008-10-29 23:54:31 +0000514 exit_code = 1
jadmanski0afbb632008-06-06 21:10:57 +0000515 traceback.print_exc()
mbligha46678d2008-05-01 20:00:01 +0000516
mblighff7d61f2008-12-22 14:53:35 +0000517 if pid_file_manager:
518 pid_file_manager.num_tests_failed = job.num_tests_failed
519 pid_file_manager.close_file(exit_code)
jadmanskie0dffc32008-12-15 17:30:30 +0000520 job.cleanup_parser()
showard21baa452008-10-21 00:08:39 +0000521
jadmanski27b37ea2008-10-29 23:54:31 +0000522 sys.exit(exit_code)
mbligha46678d2008-05-01 20:00:01 +0000523
524
Fang Deng042c1472014-10-23 13:56:41 -0700525def record_autoserv(options, duration_secs):
526 """Record autoserv end-to-end time in metadata db.
527
528 @param options: parser options.
529 @param duration_secs: How long autoserv has taken, in secs.
530 """
531 # Get machine hostname
532 machines = options.machines.replace(
533 ',', ' ').strip().split() if options.machines else []
534 num_machines = len(machines)
535 if num_machines > 1:
536 # Skip the case where atomic group is used.
537 return
538 elif num_machines == 0:
539 machines.append('hostless')
540
541 # Determine the status that will be reported.
542 s = job_overhead.STATUS
543 task_mapping = {
544 'reset': s.RESETTING, 'verify': s.VERIFYING,
545 'provision': s.PROVISIONING, 'repair': s.REPAIRING,
546 'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
Dan Shi888cfca2015-07-31 15:49:00 -0700547 match = filter(lambda task: getattr(options, task, False) == True,
548 task_mapping)
Fang Deng042c1472014-10-23 13:56:41 -0700549 status = task_mapping[match[0]] if match else s.RUNNING
550 is_special_task = status not in [s.RUNNING, s.GATHERING]
Dan Shicf4d2032015-03-12 15:04:21 -0700551 job_or_task_id = job_directories.get_job_id_or_task_id(options.results)
Fang Deng042c1472014-10-23 13:56:41 -0700552 job_overhead.record_state_duration(
553 job_or_task_id, machines[0], status, duration_secs,
554 is_special_task=is_special_task)
555
556
mbligha46678d2008-05-01 20:00:01 +0000557def main():
Fang Deng042c1472014-10-23 13:56:41 -0700558 start_time = datetime.datetime.now()
Dan Shia1ecd5c2013-06-06 11:21:31 -0700559 # White list of tests with run time measurement enabled.
Dan Shia06f3e22015-09-03 16:15:15 -0700560 measure_run_time_tests_names = _CONFIG.get_config_value(
561 'AUTOSERV', 'measure_run_time_tests', type=str)
Dan Shia1ecd5c2013-06-06 11:21:31 -0700562 if measure_run_time_tests_names:
563 measure_run_time_tests = [t.strip() for t in
564 measure_run_time_tests_names.split(',')]
565 else:
566 measure_run_time_tests = []
jadmanski0afbb632008-06-06 21:10:57 +0000567 # grab the parser
568 parser = autoserv_parser.autoserv_parser
mbligha5cb4062009-02-17 15:53:39 +0000569 parser.parse_args()
mbligha46678d2008-05-01 20:00:01 +0000570
jadmanski0afbb632008-06-06 21:10:57 +0000571 if len(sys.argv) == 1:
572 parser.parser.print_help()
573 sys.exit(1)
mbligha6f13082008-06-05 23:53:46 +0000574
Dan Shicf4d2032015-03-12 15:04:21 -0700575 # If the job requires to run with server-side package, try to stage server-
576 # side package first. If that fails with error that autotest server package
Dan Shic68fefb2015-04-07 10:10:52 -0700577 # does not exist, fall back to run the job without using server-side
578 # packaging. If option warn_no_ssp is specified, that means autoserv is
579 # running in a drone does not support SSP, thus no need to stage server-side
580 # package.
Dan Shicf4d2032015-03-12 15:04:21 -0700581 ssp_url = None
Dan Shi0b754c52015-04-20 14:20:38 -0700582 ssp_url_warning = False
Dan Shic68fefb2015-04-07 10:10:52 -0700583 if (not parser.options.warn_no_ssp and parser.options.require_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700584 ssp_url = _stage_ssp(parser)
Dan Shi0b754c52015-04-20 14:20:38 -0700585 # The build does not have autotest server package. Fall back to not
586 # to use server-side package. Logging is postponed until logging being
587 # set up.
588 ssp_url_warning = not ssp_url
Dan Shicf4d2032015-03-12 15:04:21 -0700589
showard75cdfee2009-06-10 17:40:41 +0000590 if parser.options.no_logging:
591 results = None
592 else:
593 results = parser.options.results
mbligh80e1eba2008-11-19 00:26:18 +0000594 if not results:
595 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
596 results = os.path.abspath(results)
showard566d3c02010-01-12 18:57:01 +0000597 resultdir_exists = False
598 for filename in ('control.srv', 'status.log', '.autoserv_execute'):
599 if os.path.exists(os.path.join(results, filename)):
600 resultdir_exists = True
mbligh4608b002010-01-05 18:22:35 +0000601 if not parser.options.use_existing_results and resultdir_exists:
mbligh80e1eba2008-11-19 00:26:18 +0000602 error = "Error: results directory already exists: %s\n" % results
603 sys.stderr.write(error)
604 sys.exit(1)
mbligha788dc42009-03-26 21:10:16 +0000605
606 # Now that we certified that there's no leftover results dir from
607 # previous jobs, lets create the result dir since the logging system
608 # needs to create the log file in there.
609 if not os.path.isdir(results):
610 os.makedirs(results)
showard75cdfee2009-06-10 17:40:41 +0000611
Dan Shic68fefb2015-04-07 10:10:52 -0700612 # Server-side packaging will only be used if it's required and the package
613 # is available. If warn_no_ssp is specified, it means that autoserv is
614 # running in a drone does not have SSP supported and a warning will be logs.
615 # Therefore, it should not run with SSP.
616 use_ssp = (not parser.options.warn_no_ssp and parser.options.require_ssp
617 and ssp_url)
618 if use_ssp:
Dan Shie28de552015-05-06 16:51:58 -0700619 log_dir = os.path.join(results, 'ssp_logs') if results else None
Dan Shicf4d2032015-03-12 15:04:21 -0700620 if log_dir and not os.path.exists(log_dir):
621 os.makedirs(log_dir)
622 else:
623 log_dir = results
Dan Shi3f1b8a52015-04-21 11:11:06 -0700624
showard75cdfee2009-06-10 17:40:41 +0000625 logging_manager.configure_logging(
Dan Shicf4d2032015-03-12 15:04:21 -0700626 server_logging_config.ServerLoggingConfig(),
627 results_dir=log_dir,
showard10d84172009-06-18 23:16:50 +0000628 use_console=not parser.options.no_tee,
629 verbose=parser.options.verbose,
630 no_console_prefix=parser.options.no_console_prefix)
Dan Shicf4d2032015-03-12 15:04:21 -0700631
Dan Shi0b754c52015-04-20 14:20:38 -0700632 if ssp_url_warning:
633 logging.warn(
634 'Autoserv is required to run with server-side packaging. '
635 'However, no server-side package can be found based on '
636 '`--image`, host attribute job_repo_url or host label of '
637 'cros-version. The test will be executed without '
638 'server-side packaging supported.')
639
showard75cdfee2009-06-10 17:40:41 +0000640 if results:
mbligha788dc42009-03-26 21:10:16 +0000641 logging.info("Results placed in %s" % results)
mbligh10717632008-11-19 00:21:57 +0000642
mbligh4608b002010-01-05 18:22:35 +0000643 # wait until now to perform this check, so it get properly logged
Dan Shicf4d2032015-03-12 15:04:21 -0700644 if (parser.options.use_existing_results and not resultdir_exists and
Dan Shiff78f112015-06-12 13:34:02 -0700645 not utils.is_in_container()):
mbligh4608b002010-01-05 18:22:35 +0000646 logging.error("No existing results directory found: %s", results)
647 sys.exit(1)
648
Dan Shicf4d2032015-03-12 15:04:21 -0700649 logging.debug('autoserv is running in drone %s.', socket.gethostname())
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700650 logging.debug('autoserv command was: %s', ' '.join(sys.argv))
mbligh4608b002010-01-05 18:22:35 +0000651
Dan Shicf4d2032015-03-12 15:04:21 -0700652 if parser.options.write_pidfile and results:
mbligh4608b002010-01-05 18:22:35 +0000653 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
654 results)
jadmanskid5ab8c52008-12-03 16:27:07 +0000655 pid_file_manager.open_file()
mblighff7d61f2008-12-22 14:53:35 +0000656 else:
657 pid_file_manager = None
mbligha46678d2008-05-01 20:00:01 +0000658
jadmanskif22fea82008-11-26 20:57:07 +0000659 autotest.BaseAutotest.set_install_in_tmpdir(
660 parser.options.install_in_tmpdir)
661
Dan Shia1ecd5c2013-06-06 11:21:31 -0700662 timer = None
663 try:
664 # Take the first argument as control file name, get the test name from
665 # the control file. If the test name exists in the list of tests with
666 # run time measurement enabled, start a timer to begin measurement.
667 if (len(parser.args) > 0 and parser.args[0] != '' and
668 parser.options.machines):
Dan Shibbc16132013-07-09 16:23:59 -0700669 try:
670 test_name = control_data.parse_control(parser.args[0],
671 raise_warnings=True).name
672 except control_data.ControlVariableException:
673 logging.debug('Failed to retrieve test name from control file.')
674 test_name = None
Dan Shia1ecd5c2013-06-06 11:21:31 -0700675 if test_name in measure_run_time_tests:
676 machines = parser.options.machines.replace(',', ' '
677 ).strip().split()
Dan Shi8eac5af2014-09-17 00:15:15 -0700678 try:
679 afe = frontend.AFE()
680 board = server_utils.get_board_from_afe(machines[0], afe)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800681 timer = autotest_stats.Timer('autoserv_run_time.%s.%s' %
682 (board, test_name))
Dan Shi8eac5af2014-09-17 00:15:15 -0700683 timer.start()
684 except (urllib2.HTTPError, urllib2.URLError):
685 # Ignore error if RPC failed to get board
686 pass
Dan Shia1ecd5c2013-06-06 11:21:31 -0700687 except control_data.ControlVariableException as e:
688 logging.error(str(e))
jadmanski0afbb632008-06-06 21:10:57 +0000689 exit_code = 0
Prashanth B6285f6a2014-05-08 18:01:27 -0700690 # TODO(beeps): Extend this to cover different failure modes.
691 # Testing exceptions are matched against labels sent to autoserv. Eg,
692 # to allow only the hostless job to run, specify
693 # testing_exceptions: test_suite in the shadow_config. To allow both
694 # the hostless job and dummy_Pass to run, specify
695 # testing_exceptions: test_suite,dummy_Pass. You can figure out
696 # what label autoserv is invoked with by looking through the logs of a test
697 # for the autoserv command's -l option.
Dan Shia06f3e22015-09-03 16:15:15 -0700698 testing_exceptions = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700699 'AUTOSERV', 'testing_exceptions', type=list, default=[])
Dan Shia06f3e22015-09-03 16:15:15 -0700700 test_mode = _CONFIG.get_config_value(
Prashanth B6285f6a2014-05-08 18:01:27 -0700701 'AUTOSERV', 'testing_mode', type=bool, default=False)
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800702 test_mode = (results_mocker and test_mode and not
703 any([ex in parser.options.label
704 for ex in testing_exceptions]))
705 is_task = (parser.options.verify or parser.options.repair or
706 parser.options.provision or parser.options.reset or
707 parser.options.cleanup or parser.options.collect_crashinfo)
jadmanski0afbb632008-06-06 21:10:57 +0000708 try:
709 try:
Prashanth B6285f6a2014-05-08 18:01:27 -0700710 if test_mode:
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800711 # The parser doesn't run on tasks anyway, so we can just return
712 # happy signals without faking results.
713 if not is_task:
714 machine = parser.options.results.split('/')[-1]
715
716 # TODO(beeps): The proper way to do this would be to
717 # refactor job creation so we can invoke job.record
718 # directly. To do that one needs to pipe the test_name
719 # through run_autoserv and bail just before invoking
720 # the server job. See the comment in
721 # puppylab/results_mocker for more context.
722 results_mocker.ResultsMocker(
Prashanth Balasubramanian22dd2262014-11-28 18:19:18 -0800723 test_name if test_name else 'unknown-test',
724 parser.options.results, machine
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800725 ).mock_results()
726 return
Prashanth B6285f6a2014-05-08 18:01:27 -0700727 else:
Dan Shic68fefb2015-04-07 10:10:52 -0700728 run_autoserv(pid_file_manager, results, parser, ssp_url,
729 use_ssp)
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700730 except SystemExit as e:
jadmanski0afbb632008-06-06 21:10:57 +0000731 exit_code = e.code
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700732 if exit_code:
733 logging.exception(e)
734 except Exception as e:
jadmanski0afbb632008-06-06 21:10:57 +0000735 # If we don't know what happened, we'll classify it as
736 # an 'abort' and return 1.
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700737 logging.exception(e)
jadmanski0afbb632008-06-06 21:10:57 +0000738 exit_code = 1
739 finally:
mblighff7d61f2008-12-22 14:53:35 +0000740 if pid_file_manager:
741 pid_file_manager.close_file(exit_code)
Dan Shia1ecd5c2013-06-06 11:21:31 -0700742 if timer:
743 timer.stop()
Fang Deng042c1472014-10-23 13:56:41 -0700744 # Record the autoserv duration time. Must be called
745 # just before the system exits to ensure accuracy.
746 duration_secs = (datetime.datetime.now() - start_time).total_seconds()
747 record_autoserv(parser.options, duration_secs)
jadmanski0afbb632008-06-06 21:10:57 +0000748 sys.exit(exit_code)
mblighfaf0cd42007-11-19 16:00:24 +0000749
mblighbb421852008-03-11 22:36:16 +0000750
mbligha46678d2008-05-01 20:00:01 +0000751if __name__ == '__main__':
jadmanski0afbb632008-06-06 21:10:57 +0000752 main()