blob: 8be52c9a15f00c13b31b51dfcffc2710cd2aa6d1 [file] [log] [blame]
mbligh6203ace2007-10-04 21:54:24 +00001#!/usr/bin/python -u
mbligh1ffd5dc2008-11-25 13:24:05 +00002# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
mbligh82648e52008-11-20 16:54:25 +00003# Released under the GPL v2
mblighdcd57a82007-07-11 23:06:47 +00004
mblighc8949b82007-07-23 16:33:58 +00005"""
Aviv Keshetde6bb192013-01-30 16:17:22 -08006Run a control file through the server side engine
mblighdcd57a82007-07-11 23:06:47 +00007"""
mbligh1ffd5dc2008-11-25 13:24:05 +00008
Fang Deng042c1472014-10-23 13:56:41 -07009import ast
10import datetime
11import getpass
12import logging
13import os
14import re
15import signal
Dan Shicf4d2032015-03-12 15:04:21 -070016import socket
Fang Deng042c1472014-10-23 13:56:41 -070017import sys
18import traceback
19import time
20import urllib2
mbligh1ffd5dc2008-11-25 13:24:05 +000021
mblighf5427bb2008-04-09 15:55:57 +000022import common
mbligh9ff89cd2009-09-03 20:28:17 +000023
Dan Shia1ecd5c2013-06-06 11:21:31 -070024from autotest_lib.client.common_lib import control_data
Dan Shi32649b82015-08-29 20:53:36 -070025from autotest_lib.client.common_lib import error
Dan Shia1ecd5c2013-06-06 11:21:31 -070026from autotest_lib.client.common_lib import global_config
Dan Shi5ddf6a32015-05-02 00:22:01 -070027from autotest_lib.client.common_lib import utils
Dan Shi37bee222015-04-13 15:46:47 -070028from autotest_lib.client.common_lib.cros.graphite import autotest_es
29from autotest_lib.client.common_lib.cros.graphite import autotest_stats
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -080030try:
31 from autotest_lib.puppylab import results_mocker
32except ImportError:
33 results_mocker = None
34
Dan Shia1ecd5c2013-06-06 11:21:31 -070035require_atfork = global_config.global_config.get_config_value(
mblighcb8cb332009-09-03 21:08:56 +000036 'AUTOSERV', 'require_atfork_module', type=bool, default=True)
37
Dan Shia1ecd5c2013-06-06 11:21:31 -070038
Jakob Jueliche497b552014-09-23 19:11:59 -070039# Number of seconds to wait before returning if testing mode is enabled
Prashanth B6285f6a2014-05-08 18:01:27 -070040TESTING_MODE_SLEEP_SECS = 1
Jakob Jueliche497b552014-09-23 19:11:59 -070041
mblighcb8cb332009-09-03 21:08:56 +000042try:
43 import atfork
44 atfork.monkeypatch_os_fork_functions()
45 import atfork.stdlib_fixer
46 # Fix the Python standard library for threading+fork safety with its
47 # internal locks. http://code.google.com/p/python-atfork/
48 import warnings
49 warnings.filterwarnings('ignore', 'logging module already imported')
50 atfork.stdlib_fixer.fix_logging_module()
51except ImportError, e:
52 from autotest_lib.client.common_lib import global_config
53 if global_config.global_config.get_config_value(
54 'AUTOSERV', 'require_atfork_module', type=bool, default=False):
55 print >>sys.stderr, 'Please run utils/build_externals.py'
56 print e
57 sys.exit(1)
mbligh9ff89cd2009-09-03 20:28:17 +000058
Dan Shia1ecd5c2013-06-06 11:21:31 -070059from autotest_lib.server import frontend
showard75cdfee2009-06-10 17:40:41 +000060from autotest_lib.server import server_logging_config
showard043c62a2009-06-10 19:48:57 +000061from autotest_lib.server import server_job, utils, autoserv_parser, autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070062from autotest_lib.server import utils as server_utils
Dan Shicf4d2032015-03-12 15:04:21 -070063from autotest_lib.site_utils import job_directories
Fang Deng042c1472014-10-23 13:56:41 -070064from autotest_lib.site_utils import job_overhead
Dan Shicf4d2032015-03-12 15:04:21 -070065from autotest_lib.site_utils import lxc
Dan Shi7836d252015-04-27 15:33:58 -070066from autotest_lib.site_utils import lxc_utils
showard75cdfee2009-06-10 17:40:41 +000067from autotest_lib.client.common_lib import pidfile, logging_manager
Gabe Black1e1c41b2015-02-04 23:55:15 -080068from autotest_lib.client.common_lib.cros.graphite import autotest_stats
mbligh92c0fc22008-11-20 16:52:23 +000069
Dan Shicf4d2032015-03-12 15:04:21 -070070# Control segment to stage server-side package.
71STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
72 'stage_server_side_package')
73
Alex Millerf1af17e2013-01-09 22:50:32 -080074def log_alarm(signum, frame):
75 logging.error("Received SIGALARM. Ignoring and continuing on.")
Alex Miller0528d6f2013-01-11 10:49:48 -080076 sys.exit(1)
Alex Millerf1af17e2013-01-09 22:50:32 -080077
Dan Shicf4d2032015-03-12 15:04:21 -070078
79def _get_machines(parser):
80 """Get a list of machine names from command line arg -m or a file.
81
82 @param parser: Parser for the command line arguments.
83
84 @return: A list of machine names from command line arg -m or the
85 machines file specified in the command line arg -M.
86 """
87 if parser.options.machines:
88 machines = parser.options.machines.replace(',', ' ').strip().split()
89 else:
90 machines = []
91 machines_file = parser.options.machines_file
92 if machines_file:
93 machines = []
94 for m in open(machines_file, 'r').readlines():
95 # remove comments, spaces
96 m = re.sub('#.*', '', m).strip()
97 if m:
98 machines.append(m)
99 logging.debug('Read list of machines from file: %s', machines_file)
100 logging.debug('Machines: %s', ','.join(machines))
101
102 if machines:
103 for machine in machines:
104 if not machine or re.search('\s', machine):
105 parser.parser.error("Invalid machine: %s" % str(machine))
106 machines = list(set(machines))
107 machines.sort()
108 return machines
109
110
111def _stage_ssp(parser):
112 """Stage server-side package.
113
114 This function calls a control segment to stage server-side package based on
115 the job and autoserv command line option. The detail implementation could
116 be different for each host type. Currently, only CrosHost has
117 stage_server_side_package function defined.
118 The script returns None if no server-side package is available. However,
119 it may raise exception if it failed for reasons other than artifact (the
120 server-side package) not found.
121
122 @param parser: Command line arguments parser passed in the autoserv process.
123
124 @return: url of the staged server-side package. Return None if server-
125 side package is not found for the build.
126 """
Dan Shi36cfd832014-10-10 13:38:51 -0700127 # If test_source_build is not specified, default to use server-side test
128 # code from build specified in --image.
Dan Shicf4d2032015-03-12 15:04:21 -0700129 namespace = {'machines': _get_machines(parser),
Dan Shi36cfd832014-10-10 13:38:51 -0700130 'image': (parser.options.test_source_build or
131 parser.options.image),}
Dan Shicf4d2032015-03-12 15:04:21 -0700132 script_locals = {}
133 execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
134 return script_locals['ssp_url']
135
136
137def _run_with_ssp(container_name, job_id, results, parser, ssp_url):
138 """Run the server job with server-side packaging.
139
140 @param container_name: Name of the container to run the test.
141 @param job_id: ID of the test job.
142 @param results: Folder to store results. This could be different from
143 parser.options.results:
144 parser.options.results can be set to None for results to be
145 stored in a temp folder.
146 results can be None for autoserv run requires no logging.
147 @param parser: Command line parser that contains the options.
148 @param ssp_url: url of the staged server-side package.
149 """
150 bucket = lxc.ContainerBucket()
151 control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
152 else None)
153 test_container = bucket.setup_test(container_name, job_id, ssp_url, results,
154 control=control)
155 args = sys.argv[:]
156 args.remove('--require-ssp')
Dan Shi77b79a62015-07-29 16:22:05 -0700157 # --parent_job_id is only useful in autoserv running in host, not in
158 # container. Include this argument will cause test to fail for builds before
159 # CL 286265 was merged.
160 if '--parent_job_id' in args:
161 index = args.index('--parent_job_id')
162 args.remove('--parent_job_id')
163 # Remove the actual parent job id in command line arg.
164 del args[index]
Dan Shicf4d2032015-03-12 15:04:21 -0700165
166 # A dictionary of paths to replace in the command line. Key is the path to
167 # be replaced with the one in value.
168 paths_to_replace = {}
169 # Replace the control file path with the one in container.
170 if control:
171 container_control_filename = os.path.join(
172 lxc.CONTROL_TEMP_PATH, os.path.basename(control))
173 paths_to_replace[control] = container_control_filename
174 # Update result directory with the one in container.
175 if parser.options.results:
176 container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_id)
177 paths_to_replace[parser.options.results] = container_result_dir
178 # Update parse_job directory with the one in container. The assumption is
179 # that the result folder to be parsed is always the same as the results_dir.
180 if parser.options.parse_job:
181 container_parse_dir = os.path.join(lxc.RESULT_DIR_FMT % job_id)
182 paths_to_replace[parser.options.parse_job] = container_result_dir
183
184 args = [paths_to_replace.get(arg, arg) for arg in args]
185
186 # Apply --use-existing-results, results directory is aready created and
187 # mounted in container. Apply this arg to avoid exception being raised.
188 if not '--use-existing-results' in args:
189 args.append('--use-existing-results')
190
191 # Make sure autoserv running in container using a different pid file.
192 if not '--pidfile-label' in args:
193 args.extend(['--pidfile-label', 'container_autoserv'])
194
Dan Shid1f51232015-04-18 00:29:14 -0700195 cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
Dan Shicf4d2032015-03-12 15:04:21 -0700196 logging.info('Run command in container: %s', cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700197 success = False
Dan Shicf4d2032015-03-12 15:04:21 -0700198 try:
199 test_container.attach_run(cmd_line)
Dan Shi37bee222015-04-13 15:46:47 -0700200 success = True
Dan Shicf4d2032015-03-12 15:04:21 -0700201 finally:
Dan Shi37bee222015-04-13 15:46:47 -0700202 counter_key = '%s.%s' % (lxc.STATS_KEY,
203 'success' if success else 'fail')
204 autotest_stats.Counter(counter_key).increment()
205 # metadata is uploaded separately so it can use http to upload.
206 metadata = {'drone': socket.gethostname(),
207 'job_id': job_id,
208 'success': success}
209 autotest_es.post(use_http=True,
210 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
211 metadata=metadata)
Dan Shicf4d2032015-03-12 15:04:21 -0700212 test_container.destroy()
213
214
Dan Shi3f1b8a52015-04-21 11:11:06 -0700215def correct_results_folder_permission(results):
216 """Make sure the results folder has the right permission settings.
217
218 For tests running with server-side packaging, the results folder has the
219 owner of root. This must be changed to the user running the autoserv
220 process, so parsing job can access the results folder.
221 TODO(dshi): crbug.com/459344 Remove this function when test container can be
222 unprivileged container.
223
224 @param results: Path to the results folder.
225
226 """
227 if not results:
228 return
229
Dan Shi32649b82015-08-29 20:53:36 -0700230 try:
231 utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
232 utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
233 except error.CmdError as e:
234 metadata = {'error': str(e),
235 'result_folder': results,
236 'drone': socket.gethostname()}
237 autotest_es.post(use_http=True, type_str='correct_results_folder_failure',
238 metadata=metadata)
239 raise
Dan Shi3f1b8a52015-04-21 11:11:06 -0700240
241
Dan Shic68fefb2015-04-07 10:10:52 -0700242def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700243 """Run server job with given options.
244
245 @param pid_file_manager: PidFileManager used to monitor the autoserv process
246 @param results: Folder to store results.
247 @param parser: Parser for the command line arguments.
248 @param ssp_url: Url to server-side package.
Dan Shic68fefb2015-04-07 10:10:52 -0700249 @param use_ssp: Set to True to run with server-side packaging.
Dan Shicf4d2032015-03-12 15:04:21 -0700250 """
Dan Shiec1d47d2015-02-13 11:38:13 -0800251 if parser.options.warn_no_ssp:
Dan Shic68fefb2015-04-07 10:10:52 -0700252 # Post a warning in the log.
Dan Shiec1d47d2015-02-13 11:38:13 -0800253 logging.warn('Autoserv is required to run with server-side packaging. '
254 'However, no drone is found to support server-side '
255 'packaging. The test will be executed in a drone without '
256 'server-side packaging supported.')
257
jadmanski0afbb632008-06-06 21:10:57 +0000258 # send stdin to /dev/null
259 dev_null = os.open(os.devnull, os.O_RDONLY)
260 os.dup2(dev_null, sys.stdin.fileno())
261 os.close(dev_null)
mblighdbf37612007-11-24 19:38:11 +0000262
jadmanski0afbb632008-06-06 21:10:57 +0000263 # Create separate process group
264 os.setpgrp()
mbligh1d42d4e2007-11-05 22:42:00 +0000265
Dan Shicf4d2032015-03-12 15:04:21 -0700266 # Container name is predefined so the container can be destroyed in
267 # handle_sigterm.
268 job_or_task_id = job_directories.get_job_id_or_task_id(
269 parser.options.results)
270 container_name = (lxc.TEST_CONTAINER_NAME_FMT %
Dan Shid68d51c2015-04-21 17:00:42 -0700271 (job_or_task_id, time.time(), os.getpid()))
Dan Shicf4d2032015-03-12 15:04:21 -0700272
jadmanski0afbb632008-06-06 21:10:57 +0000273 # Implement SIGTERM handler
mblighc2299562009-07-02 19:00:36 +0000274 def handle_sigterm(signum, frame):
Simran Basi9d9b7292013-10-16 16:44:07 -0700275 logging.debug('Received SIGTERM')
mblighff7d61f2008-12-22 14:53:35 +0000276 if pid_file_manager:
277 pid_file_manager.close_file(1, signal.SIGTERM)
Simran Basi49e21e62013-10-17 12:40:33 -0700278 logging.debug('Finished writing to pid_file. Killing process.')
Dan Shi3f1b8a52015-04-21 11:11:06 -0700279
280 # Update results folder's file permission. This needs to be done ASAP
281 # before the parsing process tries to access the log.
282 if use_ssp and results:
283 correct_results_folder_permission(results)
284
Simran Basid6b83772014-01-06 16:31:30 -0800285 # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
286 # This sleep allows the pending output to be logged before the kill
287 # signal is sent.
288 time.sleep(.1)
Dan Shic68fefb2015-04-07 10:10:52 -0700289 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700290 logging.debug('Destroy container %s before aborting the autoserv '
291 'process.', container_name)
Dan Shi3f1b8a52015-04-21 11:11:06 -0700292 metadata = {'drone': socket.gethostname(),
293 'job_id': job_or_task_id,
294 'container_name': container_name,
295 'action': 'abort',
296 'success': True}
Dan Shicf4d2032015-03-12 15:04:21 -0700297 try:
298 bucket = lxc.ContainerBucket()
299 container = bucket.get(container_name)
300 if container:
301 container.destroy()
302 else:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700303 metadata['success'] = False
304 metadata['error'] = 'container not found'
Dan Shicf4d2032015-03-12 15:04:21 -0700305 logging.debug('Container %s is not found.', container_name)
306 except:
Dan Shi3f1b8a52015-04-21 11:11:06 -0700307 metadata['success'] = False
308 metadata['error'] = 'Exception: %s' % sys.exc_info()
Dan Shicf4d2032015-03-12 15:04:21 -0700309 # Handle any exception so the autoserv process can be aborted.
310 logging.error('Failed to destroy container %s. Error: %s',
311 container_name, sys.exc_info())
Dan Shi3f1b8a52015-04-21 11:11:06 -0700312 autotest_es.post(use_http=True,
313 type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
314 metadata=metadata)
Dan Shie4a4f9f2015-07-20 09:00:25 -0700315 # Try to correct the result file permission again after the
316 # container is destroyed, as the container might have created some
317 # new files in the result folder.
318 if results:
319 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700320
jadmanski0afbb632008-06-06 21:10:57 +0000321 os.killpg(os.getpgrp(), signal.SIGKILL)
mblighfaf0cd42007-11-19 16:00:24 +0000322
jadmanski0afbb632008-06-06 21:10:57 +0000323 # Set signal handler
mblighc2299562009-07-02 19:00:36 +0000324 signal.signal(signal.SIGTERM, handle_sigterm)
mbligha46678d2008-05-01 20:00:01 +0000325
Simran Basid6b83772014-01-06 16:31:30 -0800326 # faulthandler is only needed to debug in the Lab and is not avaliable to
327 # be imported in the chroot as part of VMTest, so Try-Except it.
328 try:
329 import faulthandler
330 faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
331 logging.debug('faulthandler registered on SIGTERM.')
332 except ImportError:
Christopher Grant4beca022015-06-16 15:14:47 -0400333 sys.exc_clear()
Simran Basid6b83772014-01-06 16:31:30 -0800334
David Rochberg8a60d1e2011-02-01 14:22:07 -0500335 # Ignore SIGTTOU's generated by output from forked children.
336 signal.signal(signal.SIGTTOU, signal.SIG_IGN)
337
Alex Millerf1af17e2013-01-09 22:50:32 -0800338 # If we received a SIGALARM, let's be loud about it.
339 signal.signal(signal.SIGALRM, log_alarm)
340
mbligha5f5e542009-12-30 16:57:49 +0000341 # Server side tests that call shell scripts often depend on $USER being set
342 # but depending on how you launch your autotest scheduler it may not be set.
343 os.environ['USER'] = getpass.getuser()
344
mblighb2bea302008-07-24 20:25:57 +0000345 label = parser.options.label
mbligh374f3412009-05-13 21:29:45 +0000346 group_name = parser.options.group_name
mblighb2bea302008-07-24 20:25:57 +0000347 user = parser.options.user
348 client = parser.options.client
349 server = parser.options.server
jadmanski0afbb632008-06-06 21:10:57 +0000350 install_before = parser.options.install_before
mblighb2bea302008-07-24 20:25:57 +0000351 install_after = parser.options.install_after
352 verify = parser.options.verify
353 repair = parser.options.repair
showard45ae8192008-11-05 19:32:53 +0000354 cleanup = parser.options.cleanup
Alex Millercb79ba72013-05-29 14:43:00 -0700355 provision = parser.options.provision
Dan Shi07e09af2013-04-12 09:31:29 -0700356 reset = parser.options.reset
Alex Miller667b5f22014-02-28 15:33:39 -0800357 job_labels = parser.options.job_labels
mblighb2bea302008-07-24 20:25:57 +0000358 no_tee = parser.options.no_tee
jadmanski0afbb632008-06-06 21:10:57 +0000359 parse_job = parser.options.parse_job
mblighe7d9c602009-07-02 19:02:33 +0000360 execution_tag = parser.options.execution_tag
361 if not execution_tag:
362 execution_tag = parse_job
jadmanskifbc1f0a2008-07-09 14:12:54 +0000363 host_protection = parser.options.host_protection
jadmanski0afbb632008-06-06 21:10:57 +0000364 ssh_user = parser.options.ssh_user
365 ssh_port = parser.options.ssh_port
366 ssh_pass = parser.options.ssh_pass
jadmanskidef0c3c2009-03-25 20:07:10 +0000367 collect_crashinfo = parser.options.collect_crashinfo
mblighe0cbc912010-03-11 18:03:07 +0000368 control_filename = parser.options.control_filename
Scott Zawalski91493c82013-01-25 16:15:20 -0500369 test_retry = parser.options.test_retry
beepscb6f1e22013-06-28 19:14:10 -0700370 verify_job_repo_url = parser.options.verify_job_repo_url
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700371 skip_crash_collection = parser.options.skip_crash_collection
Aviv Keshet18ee3142013-08-12 15:01:51 -0700372 ssh_verbosity = int(parser.options.ssh_verbosity)
Fang Deng6cc20de2013-09-06 15:47:32 -0700373 ssh_options = parser.options.ssh_options
Dan Shib669cbd2013-09-13 11:17:17 -0700374 no_use_packaging = parser.options.no_use_packaging
mbligha46678d2008-05-01 20:00:01 +0000375
mblighb2bea302008-07-24 20:25:57 +0000376 # can't be both a client and a server side test
377 if client and server:
Eric Li861b2d52011-02-04 14:50:35 -0800378 parser.parser.error("Can not specify a test as both server and client!")
mblighb2bea302008-07-24 20:25:57 +0000379
Alex Millercb79ba72013-05-29 14:43:00 -0700380 if provision and client:
381 parser.parser.error("Cannot specify provisioning and client!")
382
383 is_special_task = (verify or repair or cleanup or collect_crashinfo or
Dan Shi07e09af2013-04-12 09:31:29 -0700384 provision or reset)
Alex Millercb79ba72013-05-29 14:43:00 -0700385 if len(parser.args) < 1 and not is_special_task:
Eric Li861b2d52011-02-04 14:50:35 -0800386 parser.parser.error("Missing argument: control file")
mbligha46678d2008-05-01 20:00:01 +0000387
Aviv Keshet18ee3142013-08-12 15:01:51 -0700388 if ssh_verbosity > 0:
389 # ssh_verbosity is an integer between 0 and 3, inclusive
390 ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
Fang Dengd1c2b732013-08-20 12:59:46 -0700391 else:
392 ssh_verbosity_flag = ''
Aviv Keshet18ee3142013-08-12 15:01:51 -0700393
showard45ae8192008-11-05 19:32:53 +0000394 # We have a control file unless it's just a verify/repair/cleanup job
jadmanski0afbb632008-06-06 21:10:57 +0000395 if len(parser.args) > 0:
396 control = parser.args[0]
397 else:
398 control = None
mbligha46678d2008-05-01 20:00:01 +0000399
Dan Shicf4d2032015-03-12 15:04:21 -0700400 machines = _get_machines(parser)
mbligh374f3412009-05-13 21:29:45 +0000401 if group_name and len(machines) < 2:
Dan Shicf4d2032015-03-12 15:04:21 -0700402 parser.parser.error('-G %r may only be supplied with more than one '
403 'machine.' % group_name)
mbligh374f3412009-05-13 21:29:45 +0000404
Christopher Wiley8a91f232013-07-09 11:02:27 -0700405 kwargs = {'group_name': group_name, 'tag': execution_tag,
Dan Shicf4d2032015-03-12 15:04:21 -0700406 'disable_sysinfo': parser.options.disable_sysinfo}
Dan Shi70647ca2015-07-16 22:52:35 -0700407 if parser.options.parent_job_id:
408 kwargs['parent_job_id'] = int(parser.options.parent_job_id)
mblighe0cbc912010-03-11 18:03:07 +0000409 if control_filename:
410 kwargs['control_filename'] = control_filename
jadmanski0afbb632008-06-06 21:10:57 +0000411 job = server_job.server_job(control, parser.args[1:], results, label,
412 user, machines, client, parse_job,
Fang Dengd1c2b732013-08-20 12:59:46 -0700413 ssh_user, ssh_port, ssh_pass,
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700414 ssh_verbosity_flag, ssh_options,
415 test_retry, **kwargs)
Dan Shicf4d2032015-03-12 15:04:21 -0700416
showard75cdfee2009-06-10 17:40:41 +0000417 job.logging.start_logging()
mbligh4608b002010-01-05 18:22:35 +0000418 job.init_parser()
mbligha46678d2008-05-01 20:00:01 +0000419
mbligh161fe6f2008-06-19 16:26:04 +0000420 # perform checks
421 job.precheck()
422
jadmanski0afbb632008-06-06 21:10:57 +0000423 # run the job
424 exit_code = 0
425 try:
mbligh332000a2009-06-08 16:47:28 +0000426 try:
427 if repair:
Alex Miller667b5f22014-02-28 15:33:39 -0800428 job.repair(host_protection, job_labels)
mbligh332000a2009-06-08 16:47:28 +0000429 elif verify:
Alex Miller667b5f22014-02-28 15:33:39 -0800430 job.verify(job_labels)
Alex Millercb79ba72013-05-29 14:43:00 -0700431 elif provision:
Alex Miller667b5f22014-02-28 15:33:39 -0800432 job.provision(job_labels)
Dan Shi07e09af2013-04-12 09:31:29 -0700433 elif reset:
Alex Miller667b5f22014-02-28 15:33:39 -0800434 job.reset(job_labels)
Fang Dengad78aca2014-10-02 18:15:46 -0700435 elif cleanup:
436 job.cleanup(job_labels)
mbligh332000a2009-06-08 16:47:28 +0000437 else:
Dan Shic68fefb2015-04-07 10:10:52 -0700438 if use_ssp:
Dan Shicf4d2032015-03-12 15:04:21 -0700439 try:
440 _run_with_ssp(container_name, job_or_task_id, results,
441 parser, ssp_url)
442 finally:
443 # Update the ownership of files in result folder.
Dan Shi3f1b8a52015-04-21 11:11:06 -0700444 correct_results_folder_permission(results)
Dan Shicf4d2032015-03-12 15:04:21 -0700445 else:
446 job.run(install_before, install_after,
447 verify_job_repo_url=verify_job_repo_url,
448 only_collect_crashinfo=collect_crashinfo,
449 skip_crash_collection=skip_crash_collection,
Dan Shib669cbd2013-09-13 11:17:17 -0700450 job_labels=job_labels,
451 use_packaging=(not no_use_packaging))
mbligh332000a2009-06-08 16:47:28 +0000452 finally:
453 while job.hosts:
454 host = job.hosts.pop()
455 host.close()
jadmanski0afbb632008-06-06 21:10:57 +0000456 except:
jadmanski27b37ea2008-10-29 23:54:31 +0000457 exit_code = 1
jadmanski0afbb632008-06-06 21:10:57 +0000458 traceback.print_exc()
mbligha46678d2008-05-01 20:00:01 +0000459
mblighff7d61f2008-12-22 14:53:35 +0000460 if pid_file_manager:
461 pid_file_manager.num_tests_failed = job.num_tests_failed
462 pid_file_manager.close_file(exit_code)
jadmanskie0dffc32008-12-15 17:30:30 +0000463 job.cleanup_parser()
showard21baa452008-10-21 00:08:39 +0000464
jadmanski27b37ea2008-10-29 23:54:31 +0000465 sys.exit(exit_code)
mbligha46678d2008-05-01 20:00:01 +0000466
467
Fang Deng042c1472014-10-23 13:56:41 -0700468def record_autoserv(options, duration_secs):
469 """Record autoserv end-to-end time in metadata db.
470
471 @param options: parser options.
472 @param duration_secs: How long autoserv has taken, in secs.
473 """
474 # Get machine hostname
475 machines = options.machines.replace(
476 ',', ' ').strip().split() if options.machines else []
477 num_machines = len(machines)
478 if num_machines > 1:
479 # Skip the case where atomic group is used.
480 return
481 elif num_machines == 0:
482 machines.append('hostless')
483
484 # Determine the status that will be reported.
485 s = job_overhead.STATUS
486 task_mapping = {
487 'reset': s.RESETTING, 'verify': s.VERIFYING,
488 'provision': s.PROVISIONING, 'repair': s.REPAIRING,
489 'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
Dan Shi888cfca2015-07-31 15:49:00 -0700490 match = filter(lambda task: getattr(options, task, False) == True,
491 task_mapping)
Fang Deng042c1472014-10-23 13:56:41 -0700492 status = task_mapping[match[0]] if match else s.RUNNING
493 is_special_task = status not in [s.RUNNING, s.GATHERING]
Dan Shicf4d2032015-03-12 15:04:21 -0700494 job_or_task_id = job_directories.get_job_id_or_task_id(options.results)
Fang Deng042c1472014-10-23 13:56:41 -0700495 job_overhead.record_state_duration(
496 job_or_task_id, machines[0], status, duration_secs,
497 is_special_task=is_special_task)
498
499
mbligha46678d2008-05-01 20:00:01 +0000500def main():
Fang Deng042c1472014-10-23 13:56:41 -0700501 start_time = datetime.datetime.now()
Dan Shia1ecd5c2013-06-06 11:21:31 -0700502 # White list of tests with run time measurement enabled.
503 measure_run_time_tests_names = global_config.global_config.get_config_value(
504 'AUTOSERV', 'measure_run_time_tests', type=str)
505 if measure_run_time_tests_names:
506 measure_run_time_tests = [t.strip() for t in
507 measure_run_time_tests_names.split(',')]
508 else:
509 measure_run_time_tests = []
jadmanski0afbb632008-06-06 21:10:57 +0000510 # grab the parser
511 parser = autoserv_parser.autoserv_parser
mbligha5cb4062009-02-17 15:53:39 +0000512 parser.parse_args()
mbligha46678d2008-05-01 20:00:01 +0000513
jadmanski0afbb632008-06-06 21:10:57 +0000514 if len(sys.argv) == 1:
515 parser.parser.print_help()
516 sys.exit(1)
mbligha6f13082008-06-05 23:53:46 +0000517
Dan Shicf4d2032015-03-12 15:04:21 -0700518 # If the job requires to run with server-side package, try to stage server-
519 # side package first. If that fails with error that autotest server package
Dan Shic68fefb2015-04-07 10:10:52 -0700520 # does not exist, fall back to run the job without using server-side
521 # packaging. If option warn_no_ssp is specified, that means autoserv is
522 # running in a drone does not support SSP, thus no need to stage server-side
523 # package.
Dan Shicf4d2032015-03-12 15:04:21 -0700524 ssp_url = None
Dan Shi0b754c52015-04-20 14:20:38 -0700525 ssp_url_warning = False
Dan Shic68fefb2015-04-07 10:10:52 -0700526 if (not parser.options.warn_no_ssp and parser.options.require_ssp):
Dan Shicf4d2032015-03-12 15:04:21 -0700527 ssp_url = _stage_ssp(parser)
Dan Shi0b754c52015-04-20 14:20:38 -0700528 # The build does not have autotest server package. Fall back to not
529 # to use server-side package. Logging is postponed until logging being
530 # set up.
531 ssp_url_warning = not ssp_url
Dan Shicf4d2032015-03-12 15:04:21 -0700532
showard75cdfee2009-06-10 17:40:41 +0000533 if parser.options.no_logging:
534 results = None
535 else:
536 results = parser.options.results
mbligh80e1eba2008-11-19 00:26:18 +0000537 if not results:
538 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
539 results = os.path.abspath(results)
showard566d3c02010-01-12 18:57:01 +0000540 resultdir_exists = False
541 for filename in ('control.srv', 'status.log', '.autoserv_execute'):
542 if os.path.exists(os.path.join(results, filename)):
543 resultdir_exists = True
mbligh4608b002010-01-05 18:22:35 +0000544 if not parser.options.use_existing_results and resultdir_exists:
mbligh80e1eba2008-11-19 00:26:18 +0000545 error = "Error: results directory already exists: %s\n" % results
546 sys.stderr.write(error)
547 sys.exit(1)
mbligha788dc42009-03-26 21:10:16 +0000548
549 # Now that we certified that there's no leftover results dir from
550 # previous jobs, lets create the result dir since the logging system
551 # needs to create the log file in there.
552 if not os.path.isdir(results):
553 os.makedirs(results)
showard75cdfee2009-06-10 17:40:41 +0000554
Dan Shic68fefb2015-04-07 10:10:52 -0700555 # Server-side packaging will only be used if it's required and the package
556 # is available. If warn_no_ssp is specified, it means that autoserv is
557 # running in a drone does not have SSP supported and a warning will be logs.
558 # Therefore, it should not run with SSP.
559 use_ssp = (not parser.options.warn_no_ssp and parser.options.require_ssp
560 and ssp_url)
561 if use_ssp:
Dan Shie28de552015-05-06 16:51:58 -0700562 log_dir = os.path.join(results, 'ssp_logs') if results else None
Dan Shicf4d2032015-03-12 15:04:21 -0700563 if log_dir and not os.path.exists(log_dir):
564 os.makedirs(log_dir)
565 else:
566 log_dir = results
Dan Shi3f1b8a52015-04-21 11:11:06 -0700567
showard75cdfee2009-06-10 17:40:41 +0000568 logging_manager.configure_logging(
Dan Shicf4d2032015-03-12 15:04:21 -0700569 server_logging_config.ServerLoggingConfig(),
570 results_dir=log_dir,
showard10d84172009-06-18 23:16:50 +0000571 use_console=not parser.options.no_tee,
572 verbose=parser.options.verbose,
573 no_console_prefix=parser.options.no_console_prefix)
Dan Shicf4d2032015-03-12 15:04:21 -0700574
Dan Shi0b754c52015-04-20 14:20:38 -0700575 if ssp_url_warning:
576 logging.warn(
577 'Autoserv is required to run with server-side packaging. '
578 'However, no server-side package can be found based on '
579 '`--image`, host attribute job_repo_url or host label of '
580 'cros-version. The test will be executed without '
581 'server-side packaging supported.')
582
showard75cdfee2009-06-10 17:40:41 +0000583 if results:
mbligha788dc42009-03-26 21:10:16 +0000584 logging.info("Results placed in %s" % results)
mbligh10717632008-11-19 00:21:57 +0000585
mbligh4608b002010-01-05 18:22:35 +0000586 # wait until now to perform this check, so it get properly logged
Dan Shicf4d2032015-03-12 15:04:21 -0700587 if (parser.options.use_existing_results and not resultdir_exists and
Dan Shiff78f112015-06-12 13:34:02 -0700588 not utils.is_in_container()):
mbligh4608b002010-01-05 18:22:35 +0000589 logging.error("No existing results directory found: %s", results)
590 sys.exit(1)
591
Dan Shicf4d2032015-03-12 15:04:21 -0700592 logging.debug('autoserv is running in drone %s.', socket.gethostname())
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700593 logging.debug('autoserv command was: %s', ' '.join(sys.argv))
mbligh4608b002010-01-05 18:22:35 +0000594
Dan Shicf4d2032015-03-12 15:04:21 -0700595 if parser.options.write_pidfile and results:
mbligh4608b002010-01-05 18:22:35 +0000596 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
597 results)
jadmanskid5ab8c52008-12-03 16:27:07 +0000598 pid_file_manager.open_file()
mblighff7d61f2008-12-22 14:53:35 +0000599 else:
600 pid_file_manager = None
mbligha46678d2008-05-01 20:00:01 +0000601
jadmanskif22fea82008-11-26 20:57:07 +0000602 autotest.BaseAutotest.set_install_in_tmpdir(
603 parser.options.install_in_tmpdir)
604
Dan Shia1ecd5c2013-06-06 11:21:31 -0700605 timer = None
606 try:
607 # Take the first argument as control file name, get the test name from
608 # the control file. If the test name exists in the list of tests with
609 # run time measurement enabled, start a timer to begin measurement.
610 if (len(parser.args) > 0 and parser.args[0] != '' and
611 parser.options.machines):
Dan Shibbc16132013-07-09 16:23:59 -0700612 try:
613 test_name = control_data.parse_control(parser.args[0],
614 raise_warnings=True).name
615 except control_data.ControlVariableException:
616 logging.debug('Failed to retrieve test name from control file.')
617 test_name = None
Dan Shia1ecd5c2013-06-06 11:21:31 -0700618 if test_name in measure_run_time_tests:
619 machines = parser.options.machines.replace(',', ' '
620 ).strip().split()
Dan Shi8eac5af2014-09-17 00:15:15 -0700621 try:
622 afe = frontend.AFE()
623 board = server_utils.get_board_from_afe(machines[0], afe)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800624 timer = autotest_stats.Timer('autoserv_run_time.%s.%s' %
625 (board, test_name))
Dan Shi8eac5af2014-09-17 00:15:15 -0700626 timer.start()
627 except (urllib2.HTTPError, urllib2.URLError):
628 # Ignore error if RPC failed to get board
629 pass
Dan Shia1ecd5c2013-06-06 11:21:31 -0700630 except control_data.ControlVariableException as e:
631 logging.error(str(e))
jadmanski0afbb632008-06-06 21:10:57 +0000632 exit_code = 0
Prashanth B6285f6a2014-05-08 18:01:27 -0700633 # TODO(beeps): Extend this to cover different failure modes.
634 # Testing exceptions are matched against labels sent to autoserv. Eg,
635 # to allow only the hostless job to run, specify
636 # testing_exceptions: test_suite in the shadow_config. To allow both
637 # the hostless job and dummy_Pass to run, specify
638 # testing_exceptions: test_suite,dummy_Pass. You can figure out
639 # what label autoserv is invoked with by looking through the logs of a test
640 # for the autoserv command's -l option.
641 testing_exceptions = global_config.global_config.get_config_value(
642 'AUTOSERV', 'testing_exceptions', type=list, default=[])
643 test_mode = global_config.global_config.get_config_value(
644 'AUTOSERV', 'testing_mode', type=bool, default=False)
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800645 test_mode = (results_mocker and test_mode and not
646 any([ex in parser.options.label
647 for ex in testing_exceptions]))
648 is_task = (parser.options.verify or parser.options.repair or
649 parser.options.provision or parser.options.reset or
650 parser.options.cleanup or parser.options.collect_crashinfo)
jadmanski0afbb632008-06-06 21:10:57 +0000651 try:
652 try:
Prashanth B6285f6a2014-05-08 18:01:27 -0700653 if test_mode:
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800654 # The parser doesn't run on tasks anyway, so we can just return
655 # happy signals without faking results.
656 if not is_task:
657 machine = parser.options.results.split('/')[-1]
658
659 # TODO(beeps): The proper way to do this would be to
660 # refactor job creation so we can invoke job.record
661 # directly. To do that one needs to pipe the test_name
662 # through run_autoserv and bail just before invoking
663 # the server job. See the comment in
664 # puppylab/results_mocker for more context.
665 results_mocker.ResultsMocker(
Prashanth Balasubramanian22dd2262014-11-28 18:19:18 -0800666 test_name if test_name else 'unknown-test',
667 parser.options.results, machine
Prashanth Balasubramanianf8b83712014-11-06 15:58:21 -0800668 ).mock_results()
669 return
Prashanth B6285f6a2014-05-08 18:01:27 -0700670 else:
Dan Shic68fefb2015-04-07 10:10:52 -0700671 run_autoserv(pid_file_manager, results, parser, ssp_url,
672 use_ssp)
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700673 except SystemExit as e:
jadmanski0afbb632008-06-06 21:10:57 +0000674 exit_code = e.code
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700675 if exit_code:
676 logging.exception(e)
677 except Exception as e:
jadmanski0afbb632008-06-06 21:10:57 +0000678 # If we don't know what happened, we'll classify it as
679 # an 'abort' and return 1.
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700680 logging.exception(e)
jadmanski0afbb632008-06-06 21:10:57 +0000681 exit_code = 1
682 finally:
mblighff7d61f2008-12-22 14:53:35 +0000683 if pid_file_manager:
684 pid_file_manager.close_file(exit_code)
Dan Shia1ecd5c2013-06-06 11:21:31 -0700685 if timer:
686 timer.stop()
Fang Deng042c1472014-10-23 13:56:41 -0700687 # Record the autoserv duration time. Must be called
688 # just before the system exits to ensure accuracy.
689 duration_secs = (datetime.datetime.now() - start_time).total_seconds()
690 record_autoserv(parser.options, duration_secs)
jadmanski0afbb632008-06-06 21:10:57 +0000691 sys.exit(exit_code)
mblighfaf0cd42007-11-19 16:00:24 +0000692
mblighbb421852008-03-11 22:36:16 +0000693
mbligha46678d2008-05-01 20:00:01 +0000694if __name__ == '__main__':
jadmanski0afbb632008-06-06 21:10:57 +0000695 main()