blob: 288b840b2fd42367ba55c756d3bb3b2dda45d406 [file] [log] [blame]
mbligh6203ace2007-10-04 21:54:24 +00001#!/usr/bin/python -u
mbligh1ffd5dc2008-11-25 13:24:05 +00002# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
mbligh82648e52008-11-20 16:54:25 +00003# Released under the GPL v2
mblighdcd57a82007-07-11 23:06:47 +00004
mblighc8949b82007-07-23 16:33:58 +00005"""
Aviv Keshetde6bb192013-01-30 16:17:22 -08006Run a control file through the server side engine
mblighdcd57a82007-07-11 23:06:47 +00007"""
mbligh1ffd5dc2008-11-25 13:24:05 +00008
Fang Deng042c1472014-10-23 13:56:41 -07009import ast
10import datetime
11import getpass
12import logging
13import os
14import re
15import signal
Dan Shicf4d2032015-03-12 15:04:21 -070016import socket
Fang Deng042c1472014-10-23 13:56:41 -070017import sys
18import traceback
19import time
20import urllib2
mbligh1ffd5dc2008-11-25 13:24:05 +000021
mblighf5427bb2008-04-09 15:55:57 +000022import common
mbligh9ff89cd2009-09-03 20:28:17 +000023
Dan Shia1ecd5c2013-06-06 11:21:31 -070024from autotest_lib.client.common_lib import control_data
25from autotest_lib.client.common_lib import global_config
Dan Shi5ddf6a32015-05-02 00:22:01 -070026from autotest_lib.client.common_lib import utils
Dan Shi37bee222015-04-13 15:46:47 -070027from autotest_lib.client.common_lib.cros.graphite import autotest_es
28from autotest_lib.client.common_lib.cros.graphite import autotest_stats
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -080029try:
30 from autotest_lib.puppylab import results_mocker
31except ImportError:
32 results_mocker = None
33
Dan Shia1ecd5c2013-06-06 11:21:31 -070034require_atfork = global_config.global_config.get_config_value(
mblighcb8cb332009-09-03 21:08:56 +000035 'AUTOSERV', 'require_atfork_module', type=bool, default=True)
36
Dan Shia1ecd5c2013-06-06 11:21:31 -070037
Jakob Jueliche497b552014-09-23 19:11:59 -070038# Number of seconds to wait before returning if testing mode is enabled
Prashanth B6285f6a2014-05-08 18:01:27 -070039TESTING_MODE_SLEEP_SECS = 1
Jakob Jueliche497b552014-09-23 19:11:59 -070040
mblighcb8cb332009-09-03 21:08:56 +000041try:
42 import atfork
43 atfork.monkeypatch_os_fork_functions()
44 import atfork.stdlib_fixer
45 # Fix the Python standard library for threading+fork safety with its
46 # internal locks. http://code.google.com/p/python-atfork/
47 import warnings
48 warnings.filterwarnings('ignore', 'logging module already imported')
49 atfork.stdlib_fixer.fix_logging_module()
50except ImportError, e:
51 from autotest_lib.client.common_lib import global_config
52 if global_config.global_config.get_config_value(
53 'AUTOSERV', 'require_atfork_module', type=bool, default=False):
54 print >>sys.stderr, 'Please run utils/build_externals.py'
55 print e
56 sys.exit(1)
mbligh9ff89cd2009-09-03 20:28:17 +000057
Dan Shia1ecd5c2013-06-06 11:21:31 -070058from autotest_lib.server import frontend
showard75cdfee2009-06-10 17:40:41 +000059from autotest_lib.server import server_logging_config
showard043c62a2009-06-10 19:48:57 +000060from autotest_lib.server import server_job, utils, autoserv_parser, autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070061from autotest_lib.server import utils as server_utils
Dan Shicf4d2032015-03-12 15:04:21 -070062from autotest_lib.site_utils import job_directories
Fang Deng042c1472014-10-23 13:56:41 -070063from autotest_lib.site_utils import job_overhead
Dan Shicf4d2032015-03-12 15:04:21 -070064from autotest_lib.site_utils import lxc
Dan Shi7836d252015-04-27 15:33:58 -070065from autotest_lib.site_utils import lxc_utils
showard75cdfee2009-06-10 17:40:41 +000066from autotest_lib.client.common_lib import pidfile, logging_manager
Gabe Black1e1c41b2015-02-04 23:55:15 -080067from autotest_lib.client.common_lib.cros.graphite import autotest_stats
mbligh92c0fc22008-11-20 16:52:23 +000068
Dan Shicf4d2032015-03-12 15:04:21 -070069# Control segment to stage server-side package.
70STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
71 'stage_server_side_package')
72
Alex Millerf1af17e2013-01-09 22:50:32 -080073def log_alarm(signum, frame):
74 logging.error("Received SIGALARM. Ignoring and continuing on.")
Alex Miller0528d6f2013-01-11 10:49:48 -080075 sys.exit(1)
Alex Millerf1af17e2013-01-09 22:50:32 -080076
Dan Shicf4d2032015-03-12 15:04:21 -070077
78def _get_machines(parser):
79 """Get a list of machine names from command line arg -m or a file.
80
81 @param parser: Parser for the command line arguments.
82
83 @return: A list of machine names from command line arg -m or the
84 machines file specified in the command line arg -M.
85 """
86 if parser.options.machines:
87 machines = parser.options.machines.replace(',', ' ').strip().split()
88 else:
89 machines = []
90 machines_file = parser.options.machines_file
91 if machines_file:
92 machines = []
93 for m in open(machines_file, 'r').readlines():
94 # remove comments, spaces
95 m = re.sub('#.*', '', m).strip()
96 if m:
97 machines.append(m)
98 logging.debug('Read list of machines from file: %s', machines_file)
99 logging.debug('Machines: %s', ','.join(machines))
100
101 if machines:
102 for machine in machines:
103 if not machine or re.search('\s', machine):
104 parser.parser.error("Invalid machine: %s" % str(machine))
105 machines = list(set(machines))
106 machines.sort()
107 return machines
108
109
110def _stage_ssp(parser):
111 """Stage server-side package.
112
113 This function calls a control segment to stage server-side package based on
114 the job and autoserv command line option. The detail implementation could
115 be different for each host type. Currently, only CrosHost has
116 stage_server_side_package function defined.
117 The script returns None if no server-side package is available. However,
118 it may raise exception if it failed for reasons other than artifact (the
119 server-side package) not found.
120
121 @param parser: Command line arguments parser passed in the autoserv process.
122
123 @return: url of the staged server-side package. Return None if server-
124 side package is not found for the build.
125 """
Dan Shi36cfd832014-10-10 13:38:51 -0700126 # If test_source_build is not specified, default to use server-side test
127 # code from build specified in --image.
Dan Shicf4d2032015-03-12 15:04:21 -0700128 namespace = {'machines': _get_machines(parser),
Dan Shi36cfd832014-10-10 13:38:51 -0700129 'image': (parser.options.test_source_build or
130 parser.options.image),}
Dan Shicf4d2032015-03-12 15:04:21 -0700131 script_locals = {}
132 execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
133 return script_locals['ssp_url']
134
135
136def _run_with_ssp(container_name, job_id, results, parser, ssp_url):
137 """Run the server job with server-side packaging.
138
139 @param container_name: Name of the container to run the test.
140 @param job_id: ID of the test job.
141 @param results: Folder to store results. This could be different from
142 parser.options.results:
143 parser.options.results can be set to None for results to be
144 stored in a temp folder.
145 results can be None for autoserv run requires no logging.
146 @param parser: Command line parser that contains the options.
147 @param ssp_url: url of the staged server-side package.
148 """
149 bucket = lxc.ContainerBucket()
150 control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
151 else None)
152 test_container = bucket.setup_test(container_name, job_id, ssp_url, results,
153 control=control)
154 args = sys.argv[:]
155 args.remove('--require-ssp')
Dan Shi77b79a62015-07-29 16:22:05 -0700156 # --parent_job_id is only useful in autoserv running in host, not in
157 # container. Include this argument will cause test to fail for builds before
158 # CL 286265 was merged.
159 if '--parent_job_id' in args:
160 index = args.index('--parent_job_id')
161 args.remove('--parent_job_id')
162 # Remove the actual parent job id in command line arg.
163 del args[index]
Dan Shicf4d2032015-03-12 15:04:21 -0700164
165 # A dictionary of paths to replace in the command line. Key is the path to
166 # be replaced with the one in value.
167 paths_to_replace = {}
168 # Replace the control file path with the one in container.
169 if control:
170 container_control_filename = os.path.join(
171 lxc.CONTROL_TEMP_PATH, os.path.basename(control))
172 paths_to_replace[control] = container_control_filename
173 # Update result directory with the one in container.
174 if parser.options.results:
175 container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_id)
176 paths_to_replace[parser.options.results] = container_result_dir
177 # Update parse_job directory with the one in container. The assumption is
178 # that the result folder to be parsed is always the same as the results_dir.
179 if parser.options.parse_job:
180 container_parse_dir = os.path.join(lxc.RESULT_DIR_FMT % job_id)
181 paths_to_replace[parser.options.parse_job] = container_result_dir
182
183 args = [paths_to_replace.get(arg, arg) for arg in args]
184
185 # Apply --use-existing-results, results directory is aready created and
186 # mounted in container. Apply this arg to avoid exception being raised.
187 if not '--use-existing-results' in args:
188 args.append('--use-existing-results')
189
190 # Make sure autoserv running in container using a different pid file.
191 if not '--pidfile-label' in args:
192 args.extend(['--pidfile-label', 'container_autoserv'])
193
Dan Shid1f51232015-04-18 00:29:14 -0700194 cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
Dan Shicf4d2032015-03-12 15:04:21 -0700195 logging.info('Run command in container: %s', cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700196 success = False
Dan Shicf4d2032015-03-12 15:04:21 -0700197 try:
198 test_container.attach_run(cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700199 success = True
Dan Shicf4d2032015-03-12 15:04:21 -0700200 finally:
Dan Shi37bee222015-04-13 15:46:47 -0700201 counter_key = '%s.%s' % (lxc.STATS_KEY,
202 'success' if success else 'fail')
203 autotest_stats.Counter(counter_key).increment()
204 # metadata is uploaded separately so it can use http to upload.
205 metadata = {'drone': socket.gethostname(),
206 'job_id': job_id,
207 'success': success}
208 autotest_es.post(use_http=True,
209 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
210 metadata=metadata)
Dan Shicf4d2032015-03-12 15:04:21 -0700211 test_container.destroy()
212
213
Dan Shi3f1b8a52015-04-21 11:11:06 -0700214def correct_results_folder_permission(results):
215 """Make sure the results folder has the right permission settings.
216
217 For tests running with server-side packaging, the results folder has the
218 owner of root. This must be changed to the user running the autoserv
219 process, so parsing job can access the results folder.
220 TODO(dshi): crbug.com/459344 Remove this function when test container can be
221 unprivileged container.
222
223 @param results: Path to the results folder.
224
225 """
226 if not results:
227 return
228
Dan Shie4a4f9f2015-07-20 09:00:25 -0700229 utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
230 utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
Dan Shi3f1b8a52015-04-21 11:11:06 -0700231
232
Dan Shic68fefb2015-04-07 10:10:52 -0700233def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700234 """Run server job with given options.
235
236 @param pid_file_manager: PidFileManager used to monitor the autoserv process
237 @param results: Folder to store results.
238 @param parser: Parser for the command line arguments.
239 @param ssp_url: Url to server-side package.
Dan Shic68fefb2015-04-07 10:10:52 -0700240 @param use_ssp: Set to True to run with server-side packaging.
Dan Shicf4d2032015-03-12 15:04:21 -0700241 """
Dan Shiec1d47d2015-02-13 11:38:13 -0800242 if parser.options.warn_no_ssp:
Dan Shic68fefb2015-04-07 10:10:52 -0700243 # Post a warning in the log.
Dan Shiec1d47d2015-02-13 11:38:13 -0800244 logging.warn('Autoserv is required to run with server-side packaging. '
245 'However, no drone is found to support server-side '
246 'packaging. The test will be executed in a drone without '
247 'server-side packaging supported.')
248
jadmanski0afbb632008-06-06 21:10:57 +0000249 # send stdin to /dev/null
250 dev_null = os.open(os.devnull, os.O_RDONLY)
251 os.dup2(dev_null, sys.stdin.fileno())
252 os.close(dev_null)
mblighdbf37612007-11-24 19:38:11 +0000253
jadmanski0afbb632008-06-06 21:10:57 +0000254 # Create separate process group
255 os.setpgrp()
mbligh1d42d4e2007-11-05 22:42:00 +0000256
Dan Shicf4d2032015-03-12 15:04:21 -0700257 # Container name is predefined so the container can be destroyed in
258 # handle_sigterm.
259 job_or_task_id = job_directories.get_job_id_or_task_id(
260 parser.options.results)
261 container_name = (lxc.TEST_CONTAINER_NAME_FMT %
Dan Shid68d51c2015-04-21 17:00:42 -0700262 (job_or_task_id, time.time(), os.getpid()))
Dan Shicf4d2032015-03-12 15:04:21 -0700263
jadmanski0afbb632008-06-06 21:10:57 +0000264 # Implement SIGTERM handler
mblighc2299562009-07-02 19:00:36 +0000265 def handle_sigterm(signum, frame):
Simran Basi9d9b7292013-10-16 16:44:07 -0700266 logging.debug('Received SIGTERM')
mblighff7d61f2008-12-22 14:53:35 +0000267 if pid_file_manager:
268 pid_file_manager.close_file(1, signal.SIGTERM)
Simran Basi49e21e62013-10-17 12:40:33 -0700269 logging.debug('Finished writing to pid_file. Killing process.')
Dan Shi3f1b8a52015-04-21 11:11:06 -0700270
271 # Update results folder's file permission. This needs to be done ASAP
272 # before the parsing process tries to access the log.
273 if use_ssp and results:
274 correct_results_folder_permission(results)
275
Simran Basid6b83772014-01-06 16:31:30 -0800276 # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
277 # This sleep allows the pending output to be logged before the kill
278 # signal is sent.
279 time.sleep(.1)
Dan Shic68fefb2015-04-07 10:10:52 -0700280 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700281 logging.debug('Destroy container %s before aborting the autoserv '
282 'process.', container_name)
Dan Shi3f1b8a52015-04-21 11:11:06 -0700283 metadata = {'drone': socket.gethostname(),
284 'job_id': job_or_task_id,
285 'container_name': container_name,
286 'action': 'abort',
287 'success': True}
Dan Shicf4d2032015-03-12 15:04:21 -0700288 try:
289 bucket = lxc.ContainerBucket()
290 container = bucket.get(container_name)
291 if container:
292 container.destroy()
293 else:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700294 metadata['success'] = False
295 metadata['error'] = 'container not found'
Dan Shicf4d2032015-03-12 15:04:21 -0700296 logging.debug('Container %s is not found.', container_name)
297 except:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700298 metadata['success'] = False
299 metadata['error'] = 'Exception: %s' % sys.exc_info()
Dan Shicf4d2032015-03-12 15:04:21 -0700300 # Handle any exception so the autoserv process can be aborted.
301 logging.error('Failed to destroy container %s. Error: %s',
302 container_name, sys.exc_info())
Dan Shi3f1b8a52015-04-21 11:11:06 -0700303 autotest_es.post(use_http=True,
304 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
305 metadata=metadata)
Dan Shie4a4f9f2015-07-20 09:00:25 -0700306 # Try to correct the result file permission again after the
307 # container is destroyed, as the container might have created some
308 # new files in the result folder.
309 if results:
310 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700311
jadmanski0afbb632008-06-06 21:10:57 +0000312 os.killpg(os.getpgrp(), signal.SIGKILL)
mblighfaf0cd42007-11-19 16:00:24 +0000313
jadmanski0afbb632008-06-06 21:10:57 +0000314 # Set signal handler
mblighc2299562009-07-02 19:00:36 +0000315 signal.signal(signal.SIGTERM, handle_sigterm)
mbligha46678d2008-05-01 20:00:01 +0000316
Simran Basid6b83772014-01-06 16:31:30 -0800317 # faulthandler is only needed to debug in the Lab and is not avaliable to
318 # be imported in the chroot as part of VMTest, so Try-Except it.
319 try:
320 import faulthandler
321 faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
322 logging.debug('faulthandler registered on SIGTERM.')
323 except ImportError:
Christopher Grant4beca022015-06-16 15:14:47 -0400324 sys.exc_clear()
Simran Basid6b83772014-01-06 16:31:30 -0800325
David Rochberg8a60d1e2011-02-01 14:22:07 -0500326 # Ignore SIGTTOU's generated by output from forked children.
327 signal.signal(signal.SIGTTOU, signal.SIG_IGN)
328
Alex Millerf1af17e2013-01-09 22:50:32 -0800329 # If we received a SIGALARM, let's be loud about it.
330 signal.signal(signal.SIGALRM, log_alarm)
331
mbligha5f5e542009-12-30 16:57:49 +0000332 # Server side tests that call shell scripts often depend on $USER being set
333 # but depending on how you launch your autotest scheduler it may not be set.
334 os.environ['USER'] = getpass.getuser()
335
mblighb2bea302008-07-24 20:25:57 +0000336 label = parser.options.label
mbligh374f3412009-05-13 21:29:45 +0000337 group_name = parser.options.group_name
mblighb2bea302008-07-24 20:25:57 +0000338 user = parser.options.user
339 client = parser.options.client
340 server = parser.options.server
jadmanski0afbb632008-06-06 21:10:57 +0000341 install_before = parser.options.install_before
mblighb2bea302008-07-24 20:25:57 +0000342 install_after = parser.options.install_after
343 verify = parser.options.verify
344 repair = parser.options.repair
showard45ae8192008-11-05 19:32:53 +0000345 cleanup = parser.options.cleanup
Alex Millercb79ba72013-05-29 14:43:00 -0700346 provision = parser.options.provision
Dan Shi07e09af2013-04-12 09:31:29 -0700347 reset = parser.options.reset
Alex Miller667b5f22014-02-28 15:33:39 -0800348 job_labels = parser.options.job_labels
mblighb2bea302008-07-24 20:25:57 +0000349 no_tee = parser.options.no_tee
jadmanski0afbb632008-06-06 21:10:57 +0000350 parse_job = parser.options.parse_job
mblighe7d9c602009-07-02 19:02:33 +0000351 execution_tag = parser.options.execution_tag
352 if not execution_tag:
353 execution_tag = parse_job
jadmanskifbc1f0a2008-07-09 14:12:54 +0000354 host_protection = parser.options.host_protection
jadmanski0afbb632008-06-06 21:10:57 +0000355 ssh_user = parser.options.ssh_user
356 ssh_port = parser.options.ssh_port
357 ssh_pass = parser.options.ssh_pass
jadmanskidef0c3c2009-03-25 20:07:10 +0000358 collect_crashinfo = parser.options.collect_crashinfo
mblighe0cbc912010-03-11 18:03:07 +0000359 control_filename = parser.options.control_filename
Scott Zawalski91493c82013-01-25 16:15:20 -0500360 test_retry = parser.options.test_retry
beepscb6f1e22013-06-28 19:14:10 -0700361 verify_job_repo_url = parser.options.verify_job_repo_url
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700362 skip_crash_collection = parser.options.skip_crash_collection
Aviv Keshet18ee3142013-08-12 15:01:51 -0700363 ssh_verbosity = int(parser.options.ssh_verbosity)
Fang Deng6cc20de2013-09-06 15:47:32 -0700364 ssh_options = parser.options.ssh_options
Dan Shib669cbd2013-09-13 11:17:17 -0700365 no_use_packaging = parser.options.no_use_packaging
mbligha46678d2008-05-01 20:00:01 +0000366
mblighb2bea302008-07-24 20:25:57 +0000367 # can't be both a client and a server side test
368 if client and server:
Eric Li861b2d52011-02-04 14:50:35 -0800369 parser.parser.error("Can not specify a test as both server and client!")
mblighb2bea302008-07-24 20:25:57 +0000370
Alex Millercb79ba72013-05-29 14:43:00 -0700371 if provision and client:
372 parser.parser.error("Cannot specify provisioning and client!")
373
374 is_special_task = (verify or repair or cleanup or collect_crashinfo or
Dan Shi07e09af2013-04-12 09:31:29 -0700375 provision or reset)
Alex Millercb79ba72013-05-29 14:43:00 -0700376 if len(parser.args) < 1 and not is_special_task:
Eric Li861b2d52011-02-04 14:50:35 -0800377 parser.parser.error("Missing argument: control file")
mbligha46678d2008-05-01 20:00:01 +0000378
Aviv Keshet18ee3142013-08-12 15:01:51 -0700379 if ssh_verbosity > 0:
380 # ssh_verbosity is an integer between 0 and 3, inclusive
381 ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
Fang Dengd1c2b732013-08-20 12:59:46 -0700382 else:
383 ssh_verbosity_flag = ''
Aviv Keshet18ee3142013-08-12 15:01:51 -0700384
showard45ae8192008-11-05 19:32:53 +0000385 # We have a control file unless it's just a verify/repair/cleanup job
jadmanski0afbb632008-06-06 21:10:57 +0000386 if len(parser.args) > 0:
387 control = parser.args[0]
388 else:
389 control = None
mbligha46678d2008-05-01 20:00:01 +0000390
Dan Shicf4d2032015-03-12 15:04:21 -0700391 machines = _get_machines(parser)
mbligh374f3412009-05-13 21:29:45 +0000392 if group_name and len(machines) < 2:
Dan Shicf4d2032015-03-12 15:04:21 -0700393 parser.parser.error('-G %r may only be supplied with more than one '
394 'machine.' % group_name)
mbligh374f3412009-05-13 21:29:45 +0000395
Christopher Wiley8a91f232013-07-09 11:02:27 -0700396 kwargs = {'group_name': group_name, 'tag': execution_tag,
Dan Shicf4d2032015-03-12 15:04:21 -0700397 'disable_sysinfo': parser.options.disable_sysinfo}
Dan Shi70647ca2015-07-16 22:52:35 -0700398 if parser.options.parent_job_id:
399 kwargs['parent_job_id'] = int(parser.options.parent_job_id)
mblighe0cbc912010-03-11 18:03:07 +0000400 if control_filename:
401 kwargs['control_filename'] = control_filename
jadmanski0afbb632008-06-06 21:10:57 +0000402 job = server_job.server_job(control, parser.args[1:], results, label,
403 user, machines, client, parse_job,
Fang Dengd1c2b732013-08-20 12:59:46 -0700404 ssh_user, ssh_port, ssh_pass,
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700405 ssh_verbosity_flag, ssh_options,
406 test_retry, **kwargs)
Dan Shicf4d2032015-03-12 15:04:21 -0700407
showard75cdfee2009-06-10 17:40:41 +0000408 job.logging.start_logging()
mbligh4608b002010-01-05 18:22:35 +0000409 job.init_parser()
mbligha46678d2008-05-01 20:00:01 +0000410
mbligh161fe6f2008-06-19 16:26:04 +0000411 # perform checks
412 job.precheck()
413
jadmanski0afbb632008-06-06 21:10:57 +0000414 # run the job
415 exit_code = 0
416 try:
mbligh332000a2009-06-08 16:47:28 +0000417 try:
418 if repair:
Alex Miller667b5f22014-02-28 15:33:39 -0800419 job.repair(host_protection, job_labels)
mbligh332000a2009-06-08 16:47:28 +0000420 elif verify:
Alex Miller667b5f22014-02-28 15:33:39 -0800421 job.verify(job_labels)
Alex Millercb79ba72013-05-29 14:43:00 -0700422 elif provision:
Alex Miller667b5f22014-02-28 15:33:39 -0800423 job.provision(job_labels)
Dan Shi07e09af2013-04-12 09:31:29 -0700424 elif reset:
Alex Miller667b5f22014-02-28 15:33:39 -0800425 job.reset(job_labels)
Fang Dengad78aca2014-10-02 18:15:46 -0700426 elif cleanup:
427 job.cleanup(job_labels)
mbligh332000a2009-06-08 16:47:28 +0000428 else:
Dan Shic68fefb2015-04-07 10:10:52 -0700429 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700430 try:
431 _run_with_ssp(container_name, job_or_task_id, results,
432 parser, ssp_url)
433 finally:
434 # Update the ownership of files in result folder.
Dan Shi3f1b8a52015-04-21 11:11:06 -0700435 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700436 else:
437 job.run(install_before, install_after,
438 verify_job_repo_url=verify_job_repo_url,
439 only_collect_crashinfo=collect_crashinfo,
440 skip_crash_collection=skip_crash_collection,
Dan Shib669cbd2013-09-13 11:17:17 -0700441 job_labels=job_labels,
442 use_packaging=(not no_use_packaging))
mbligh332000a2009-06-08 16:47:28 +0000443 finally:
444 while job.hosts:
445 host = job.hosts.pop()
446 host.close()
jadmanski0afbb632008-06-06 21:10:57 +0000447 except:
jadmanski27b37ea2008-10-29 23:54:31 +0000448 exit_code = 1
jadmanski0afbb632008-06-06 21:10:57 +0000449 traceback.print_exc()
mbligha46678d2008-05-01 20:00:01 +0000450
mblighff7d61f2008-12-22 14:53:35 +0000451 if pid_file_manager:
452 pid_file_manager.num_tests_failed = job.num_tests_failed
453 pid_file_manager.close_file(exit_code)
jadmanskie0dffc32008-12-15 17:30:30 +0000454 job.cleanup_parser()
showard21baa452008-10-21 00:08:39 +0000455
jadmanski27b37ea2008-10-29 23:54:31 +0000456 sys.exit(exit_code)
mbligha46678d2008-05-01 20:00:01 +0000457
458
Fang Deng042c1472014-10-23 13:56:41 -0700459def record_autoserv(options, duration_secs):
460 """Record autoserv end-to-end time in metadata db.
461
462 @param options: parser options.
463 @param duration_secs: How long autoserv has taken, in secs.
464 """
465 # Get machine hostname
466 machines = options.machines.replace(
467 ',', ' ').strip().split() if options.machines else []
468 num_machines = len(machines)
469 if num_machines > 1:
470 # Skip the case where atomic group is used.
471 return
472 elif num_machines == 0:
473 machines.append('hostless')
474
475 # Determine the status that will be reported.
476 s = job_overhead.STATUS
477 task_mapping = {
478 'reset': s.RESETTING, 'verify': s.VERIFYING,
479 'provision': s.PROVISIONING, 'repair': s.REPAIRING,
480 'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
481 # option_dict will be like {'reset': True, 'repair': False, ...}
482 option_dict = ast.literal_eval(str(options))
483 match = filter(lambda task: option_dict.get(task) == True, task_mapping)
484 status = task_mapping[match[0]] if match else s.RUNNING
485 is_special_task = status not in [s.RUNNING, s.GATHERING]
Dan Shicf4d2032015-03-12 15:04:21 -0700486 job_or_task_id = job_directories.get_job_id_or_task_id(options.results)
Fang Deng042c1472014-10-23 13:56:41 -0700487 job_overhead.record_state_duration(
488 job_or_task_id, machines[0], status, duration_secs,
489 is_special_task=is_special_task)
490
491
mbligha46678d2008-05-01 20:00:01 +0000492def main():
Fang Deng042c1472014-10-23 13:56:41 -0700493 start_time = datetime.datetime.now()
Dan Shia1ecd5c2013-06-06 11:21:31 -0700494 # White list of tests with run time measurement enabled.
495 measure_run_time_tests_names = global_config.global_config.get_config_value(
496 'AUTOSERV', 'measure_run_time_tests', type=str)
497 if measure_run_time_tests_names:
498 measure_run_time_tests = [t.strip() for t in
499 measure_run_time_tests_names.split(',')]
500 else:
501 measure_run_time_tests = []
jadmanski0afbb632008-06-06 21:10:57 +0000502 # grab the parser
503 parser = autoserv_parser.autoserv_parser
mbligha5cb4062009-02-17 15:53:39 +0000504 parser.parse_args()
mbligha46678d2008-05-01 20:00:01 +0000505
jadmanski0afbb632008-06-06 21:10:57 +0000506 if len(sys.argv) == 1:
507 parser.parser.print_help()
508 sys.exit(1)
mbligha6f13082008-06-05 23:53:46 +0000509
Dan Shicf4d2032015-03-12 15:04:21 -0700510 # If the job requires to run with server-side package, try to stage server-
511 # side package first. If that fails with error that autotest server package
Dan Shic68fefb2015-04-07 10:10:52 -0700512 # does not exist, fall back to run the job without using server-side
513 # packaging. If option warn_no_ssp is specified, that means autoserv is
514 # running in a drone does not support SSP, thus no need to stage server-side
515 # package.
Dan Shicf4d2032015-03-12 15:04:21 -0700516 ssp_url = None
Dan Shi0b754c52015-04-20 14:20:38 -0700517 ssp_url_warning = False
Dan Shic68fefb2015-04-07 10:10:52 -0700518 if (not parser.options.warn_no_ssp and parser.options.require_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700519 ssp_url = _stage_ssp(parser)
Dan Shi0b754c52015-04-20 14:20:38 -0700520 # The build does not have autotest server package. Fall back to not
521 # to use server-side package. Logging is postponed until logging being
522 # set up.
523 ssp_url_warning = not ssp_url
Dan Shicf4d2032015-03-12 15:04:21 -0700524
showard75cdfee2009-06-10 17:40:41 +0000525 if parser.options.no_logging:
526 results = None
527 else:
528 results = parser.options.results
mbligh80e1eba2008-11-19 00:26:18 +0000529 if not results:
530 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
531 results = os.path.abspath(results)
showard566d3c02010-01-12 18:57:01 +0000532 resultdir_exists = False
533 for filename in ('control.srv', 'status.log', '.autoserv_execute'):
534 if os.path.exists(os.path.join(results, filename)):
535 resultdir_exists = True
mbligh4608b002010-01-05 18:22:35 +0000536 if not parser.options.use_existing_results and resultdir_exists:
mbligh80e1eba2008-11-19 00:26:18 +0000537 error = "Error: results directory already exists: %s\n" % results
538 sys.stderr.write(error)
539 sys.exit(1)
mbligha788dc42009-03-26 21:10:16 +0000540
541 # Now that we certified that there's no leftover results dir from
542 # previous jobs, lets create the result dir since the logging system
543 # needs to create the log file in there.
544 if not os.path.isdir(results):
545 os.makedirs(results)
showard75cdfee2009-06-10 17:40:41 +0000546
Dan Shic68fefb2015-04-07 10:10:52 -0700547 # Server-side packaging will only be used if it's required and the package
548 # is available. If warn_no_ssp is specified, it means that autoserv is
549 # running in a drone does not have SSP supported and a warning will be logs.
550 # Therefore, it should not run with SSP.
551 use_ssp = (not parser.options.warn_no_ssp and parser.options.require_ssp
552 and ssp_url)
553 if use_ssp:
Dan Shie28de552015-05-06 16:51:58 -0700554 log_dir = os.path.join(results, 'ssp_logs') if results else None
Dan Shicf4d2032015-03-12 15:04:21 -0700555 if log_dir and not os.path.exists(log_dir):
556 os.makedirs(log_dir)
557 else:
558 log_dir = results
Dan Shi3f1b8a52015-04-21 11:11:06 -0700559
showard75cdfee2009-06-10 17:40:41 +0000560 logging_manager.configure_logging(
Dan Shicf4d2032015-03-12 15:04:21 -0700561 server_logging_config.ServerLoggingConfig(),
562 results_dir=log_dir,
showard10d84172009-06-18 23:16:50 +0000563 use_console=not parser.options.no_tee,
564 verbose=parser.options.verbose,
565 no_console_prefix=parser.options.no_console_prefix)
Dan Shicf4d2032015-03-12 15:04:21 -0700566
Dan Shi0b754c52015-04-20 14:20:38 -0700567 if ssp_url_warning:
568 logging.warn(
569 'Autoserv is required to run with server-side packaging. '
570 'However, no server-side package can be found based on '
571 '`--image`, host attribute job_repo_url or host label of '
572 'cros-version. The test will be executed without '
573 'server-side packaging supported.')
574
showard75cdfee2009-06-10 17:40:41 +0000575 if results:
mbligha788dc42009-03-26 21:10:16 +0000576 logging.info("Results placed in %s" % results)
mbligh10717632008-11-19 00:21:57 +0000577
mbligh4608b002010-01-05 18:22:35 +0000578 # wait until now to perform this check, so it get properly logged
Dan Shicf4d2032015-03-12 15:04:21 -0700579 if (parser.options.use_existing_results and not resultdir_exists and
Dan Shiff78f112015-06-12 13:34:02 -0700580 not utils.is_in_container()):
mbligh4608b002010-01-05 18:22:35 +0000581 logging.error("No existing results directory found: %s", results)
582 sys.exit(1)
583
Dan Shicf4d2032015-03-12 15:04:21 -0700584 logging.debug('autoserv is running in drone %s.', socket.gethostname())
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700585 logging.debug('autoserv command was: %s', ' '.join(sys.argv))
mbligh4608b002010-01-05 18:22:35 +0000586
Dan Shicf4d2032015-03-12 15:04:21 -0700587 if parser.options.write_pidfile and results:
mbligh4608b002010-01-05 18:22:35 +0000588 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
589 results)
jadmanskid5ab8c52008-12-03 16:27:07 +0000590 pid_file_manager.open_file()
mblighff7d61f2008-12-22 14:53:35 +0000591 else:
592 pid_file_manager = None
mbligha46678d2008-05-01 20:00:01 +0000593
jadmanskif22fea82008-11-26 20:57:07 +0000594 autotest.BaseAutotest.set_install_in_tmpdir(
595 parser.options.install_in_tmpdir)
596
Dan Shia1ecd5c2013-06-06 11:21:31 -0700597 timer = None
598 try:
599 # Take the first argument as control file name, get the test name from
600 # the control file. If the test name exists in the list of tests with
601 # run time measurement enabled, start a timer to begin measurement.
602 if (len(parser.args) > 0 and parser.args[0] != '' and
603 parser.options.machines):
Dan Shibbc16132013-07-09 16:23:59 -0700604 try:
605 test_name = control_data.parse_control(parser.args[0],
606 raise_warnings=True).name
607 except control_data.ControlVariableException:
608 logging.debug('Failed to retrieve test name from control file.')
609 test_name = None
Dan Shia1ecd5c2013-06-06 11:21:31 -0700610 if test_name in measure_run_time_tests:
611 machines = parser.options.machines.replace(',', ' '
612 ).strip().split()
Dan Shi8eac5af2014-09-17 00:15:15 -0700613 try:
614 afe = frontend.AFE()
615 board = server_utils.get_board_from_afe(machines[0], afe)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800616 timer = autotest_stats.Timer('autoserv_run_time.%s.%s' %
617 (board, test_name))
Dan Shi8eac5af2014-09-17 00:15:15 -0700618 timer.start()
619 except (urllib2.HTTPError, urllib2.URLError):
620 # Ignore error if RPC failed to get board
621 pass
Dan Shia1ecd5c2013-06-06 11:21:31 -0700622 except control_data.ControlVariableException as e:
623 logging.error(str(e))
jadmanski0afbb632008-06-06 21:10:57 +0000624 exit_code = 0
Prashanth B6285f6a2014-05-08 18:01:27 -0700625 # TODO(beeps): Extend this to cover different failure modes.
626 # Testing exceptions are matched against labels sent to autoserv. Eg,
627 # to allow only the hostless job to run, specify
628 # testing_exceptions: test_suite in the shadow_config. To allow both
629 # the hostless job and dummy_Pass to run, specify
630 # testing_exceptions: test_suite,dummy_Pass. You can figure out
631 # what label autoserv is invoked with by looking through the logs of a test
632 # for the autoserv command's -l option.
633 testing_exceptions = global_config.global_config.get_config_value(
634 'AUTOSERV', 'testing_exceptions', type=list, default=[])
635 test_mode = global_config.global_config.get_config_value(
636 'AUTOSERV', 'testing_mode', type=bool, default=False)
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800637 test_mode = (results_mocker and test_mode and not
638 any([ex in parser.options.label
639 for ex in testing_exceptions]))
640 is_task = (parser.options.verify or parser.options.repair or
641 parser.options.provision or parser.options.reset or
642 parser.options.cleanup or parser.options.collect_crashinfo)
jadmanski0afbb632008-06-06 21:10:57 +0000643 try:
644 try:
Prashanth B6285f6a2014-05-08 18:01:27 -0700645 if test_mode:
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800646 # The parser doesn't run on tasks anyway, so we can just return
647 # happy signals without faking results.
648 if not is_task:
649 machine = parser.options.results.split('/')[-1]
650
651 # TODO(beeps): The proper way to do this would be to
652 # refactor job creation so we can invoke job.record
653 # directly. To do that one needs to pipe the test_name
654 # through run_autoserv and bail just before invoking
655 # the server job. See the comment in
656 # puppylab/results_mocker for more context.
657 results_mocker.ResultsMocker(
Prashanth Balasubramanian22dd2262014-11-28 18:19:18 -0800658 test_name if test_name else 'unknown-test',
659 parser.options.results, machine
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800660 ).mock_results()
661 return
Prashanth B6285f6a2014-05-08 18:01:27 -0700662 else:
Dan Shic68fefb2015-04-07 10:10:52 -0700663 run_autoserv(pid_file_manager, results, parser, ssp_url,
664 use_ssp)
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700665 except SystemExit as e:
jadmanski0afbb632008-06-06 21:10:57 +0000666 exit_code = e.code
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700667 if exit_code:
668 logging.exception(e)
669 except Exception as e:
jadmanski0afbb632008-06-06 21:10:57 +0000670 # If we don't know what happened, we'll classify it as
671 # an 'abort' and return 1.
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700672 logging.exception(e)
jadmanski0afbb632008-06-06 21:10:57 +0000673 exit_code = 1
674 finally:
mblighff7d61f2008-12-22 14:53:35 +0000675 if pid_file_manager:
676 pid_file_manager.close_file(exit_code)
Dan Shia1ecd5c2013-06-06 11:21:31 -0700677 if timer:
678 timer.stop()
Fang Deng042c1472014-10-23 13:56:41 -0700679 # Record the autoserv duration time. Must be called
680 # just before the system exits to ensure accuracy.
681 duration_secs = (datetime.datetime.now() - start_time).total_seconds()
682 record_autoserv(parser.options, duration_secs)
jadmanski0afbb632008-06-06 21:10:57 +0000683 sys.exit(exit_code)
mblighfaf0cd42007-11-19 16:00:24 +0000684
mblighbb421852008-03-11 22:36:16 +0000685
mbligha46678d2008-05-01 20:00:01 +0000686if __name__ == '__main__':
jadmanski0afbb632008-06-06 21:10:57 +0000687 main()