blob: c0fb4d6fba4addb7da196cf5d8db6fc63711b772 [file] [log] [blame]
mbligh6203ace2007-10-04 21:54:24 +00001#!/usr/bin/python -u
mbligh1ffd5dc2008-11-25 13:24:05 +00002# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
mbligh82648e52008-11-20 16:54:25 +00003# Released under the GPL v2
mblighdcd57a82007-07-11 23:06:47 +00004
mblighc8949b82007-07-23 16:33:58 +00005"""
Aviv Keshetde6bb192013-01-30 16:17:22 -08006Run a control file through the server side engine
mblighdcd57a82007-07-11 23:06:47 +00007"""
mbligh1ffd5dc2008-11-25 13:24:05 +00008
Dan Shi8eac5af2014-09-17 00:15:15 -07009import sys, os, re, traceback, signal, time, logging, getpass, urllib2
mbligh1ffd5dc2008-11-25 13:24:05 +000010
mblighf5427bb2008-04-09 15:55:57 +000011import common
mbligh9ff89cd2009-09-03 20:28:17 +000012
Dan Shia1ecd5c2013-06-06 11:21:31 -070013from autotest_lib.client.common_lib import control_data
14from autotest_lib.client.common_lib import global_config
15require_atfork = global_config.global_config.get_config_value(
mblighcb8cb332009-09-03 21:08:56 +000016 'AUTOSERV', 'require_atfork_module', type=bool, default=True)
17
Dan Shia1ecd5c2013-06-06 11:21:31 -070018
Jakob Jueliche497b552014-09-23 19:11:59 -070019# Number of seconds to wait before returning if testing mode is enabled
Prashanth B6285f6a2014-05-08 18:01:27 -070020TESTING_MODE_SLEEP_SECS = 1
Jakob Jueliche497b552014-09-23 19:11:59 -070021
22
mblighcb8cb332009-09-03 21:08:56 +000023try:
24 import atfork
25 atfork.monkeypatch_os_fork_functions()
26 import atfork.stdlib_fixer
27 # Fix the Python standard library for threading+fork safety with its
28 # internal locks. http://code.google.com/p/python-atfork/
29 import warnings
30 warnings.filterwarnings('ignore', 'logging module already imported')
31 atfork.stdlib_fixer.fix_logging_module()
32except ImportError, e:
33 from autotest_lib.client.common_lib import global_config
34 if global_config.global_config.get_config_value(
35 'AUTOSERV', 'require_atfork_module', type=bool, default=False):
36 print >>sys.stderr, 'Please run utils/build_externals.py'
37 print e
38 sys.exit(1)
mbligh9ff89cd2009-09-03 20:28:17 +000039
Dan Shia1ecd5c2013-06-06 11:21:31 -070040from autotest_lib.server import frontend
showard75cdfee2009-06-10 17:40:41 +000041from autotest_lib.server import server_logging_config
showard043c62a2009-06-10 19:48:57 +000042from autotest_lib.server import server_job, utils, autoserv_parser, autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070043from autotest_lib.server import utils as server_utils
showard75cdfee2009-06-10 17:40:41 +000044from autotest_lib.client.common_lib import pidfile, logging_manager
Michael Liangda8c60a2014-06-03 13:24:51 -070045from autotest_lib.client.common_lib.cros.graphite import stats
mbligh92c0fc22008-11-20 16:52:23 +000046
Alex Millerf1af17e2013-01-09 22:50:32 -080047def log_alarm(signum, frame):
48 logging.error("Received SIGALARM. Ignoring and continuing on.")
Alex Miller0528d6f2013-01-11 10:49:48 -080049 sys.exit(1)
Alex Millerf1af17e2013-01-09 22:50:32 -080050
mbligha46678d2008-05-01 20:00:01 +000051def run_autoserv(pid_file_manager, results, parser):
jadmanski0afbb632008-06-06 21:10:57 +000052 # send stdin to /dev/null
53 dev_null = os.open(os.devnull, os.O_RDONLY)
54 os.dup2(dev_null, sys.stdin.fileno())
55 os.close(dev_null)
mblighdbf37612007-11-24 19:38:11 +000056
jadmanski0afbb632008-06-06 21:10:57 +000057 # Create separate process group
58 os.setpgrp()
mbligh1d42d4e2007-11-05 22:42:00 +000059
jadmanski0afbb632008-06-06 21:10:57 +000060 # Implement SIGTERM handler
mblighc2299562009-07-02 19:00:36 +000061 def handle_sigterm(signum, frame):
Simran Basi9d9b7292013-10-16 16:44:07 -070062 logging.debug('Received SIGTERM')
mblighff7d61f2008-12-22 14:53:35 +000063 if pid_file_manager:
64 pid_file_manager.close_file(1, signal.SIGTERM)
Simran Basi49e21e62013-10-17 12:40:33 -070065 logging.debug('Finished writing to pid_file. Killing process.')
Simran Basid6b83772014-01-06 16:31:30 -080066 # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
67 # This sleep allows the pending output to be logged before the kill
68 # signal is sent.
69 time.sleep(.1)
jadmanski0afbb632008-06-06 21:10:57 +000070 os.killpg(os.getpgrp(), signal.SIGKILL)
mblighfaf0cd42007-11-19 16:00:24 +000071
jadmanski0afbb632008-06-06 21:10:57 +000072 # Set signal handler
mblighc2299562009-07-02 19:00:36 +000073 signal.signal(signal.SIGTERM, handle_sigterm)
mbligha46678d2008-05-01 20:00:01 +000074
Simran Basid6b83772014-01-06 16:31:30 -080075 # faulthandler is only needed to debug in the Lab and is not avaliable to
76 # be imported in the chroot as part of VMTest, so Try-Except it.
77 try:
78 import faulthandler
79 faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
80 logging.debug('faulthandler registered on SIGTERM.')
81 except ImportError:
82 pass
83
David Rochberg8a60d1e2011-02-01 14:22:07 -050084 # Ignore SIGTTOU's generated by output from forked children.
85 signal.signal(signal.SIGTTOU, signal.SIG_IGN)
86
Alex Millerf1af17e2013-01-09 22:50:32 -080087 # If we received a SIGALARM, let's be loud about it.
88 signal.signal(signal.SIGALRM, log_alarm)
89
mbligha5f5e542009-12-30 16:57:49 +000090 # Server side tests that call shell scripts often depend on $USER being set
91 # but depending on how you launch your autotest scheduler it may not be set.
92 os.environ['USER'] = getpass.getuser()
93
mblighcce191f2008-09-19 20:31:03 +000094 if parser.options.machines:
95 machines = parser.options.machines.replace(',', ' ').strip().split()
96 else:
97 machines = []
jadmanski0afbb632008-06-06 21:10:57 +000098 machines_file = parser.options.machines_file
mblighb2bea302008-07-24 20:25:57 +000099 label = parser.options.label
mbligh374f3412009-05-13 21:29:45 +0000100 group_name = parser.options.group_name
mblighb2bea302008-07-24 20:25:57 +0000101 user = parser.options.user
102 client = parser.options.client
103 server = parser.options.server
jadmanski0afbb632008-06-06 21:10:57 +0000104 install_before = parser.options.install_before
mblighb2bea302008-07-24 20:25:57 +0000105 install_after = parser.options.install_after
106 verify = parser.options.verify
107 repair = parser.options.repair
showard45ae8192008-11-05 19:32:53 +0000108 cleanup = parser.options.cleanup
Alex Millercb79ba72013-05-29 14:43:00 -0700109 provision = parser.options.provision
Dan Shi07e09af2013-04-12 09:31:29 -0700110 reset = parser.options.reset
Alex Miller667b5f22014-02-28 15:33:39 -0800111 job_labels = parser.options.job_labels
mblighb2bea302008-07-24 20:25:57 +0000112 no_tee = parser.options.no_tee
jadmanski0afbb632008-06-06 21:10:57 +0000113 parse_job = parser.options.parse_job
mblighe7d9c602009-07-02 19:02:33 +0000114 execution_tag = parser.options.execution_tag
115 if not execution_tag:
116 execution_tag = parse_job
jadmanskifbc1f0a2008-07-09 14:12:54 +0000117 host_protection = parser.options.host_protection
jadmanski0afbb632008-06-06 21:10:57 +0000118 ssh_user = parser.options.ssh_user
119 ssh_port = parser.options.ssh_port
120 ssh_pass = parser.options.ssh_pass
jadmanskidef0c3c2009-03-25 20:07:10 +0000121 collect_crashinfo = parser.options.collect_crashinfo
mblighe0cbc912010-03-11 18:03:07 +0000122 control_filename = parser.options.control_filename
Scott Zawalski91493c82013-01-25 16:15:20 -0500123 test_retry = parser.options.test_retry
beepscb6f1e22013-06-28 19:14:10 -0700124 verify_job_repo_url = parser.options.verify_job_repo_url
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700125 skip_crash_collection = parser.options.skip_crash_collection
Aviv Keshet18ee3142013-08-12 15:01:51 -0700126 ssh_verbosity = int(parser.options.ssh_verbosity)
Fang Deng6cc20de2013-09-06 15:47:32 -0700127 ssh_options = parser.options.ssh_options
mbligha46678d2008-05-01 20:00:01 +0000128
mblighb2bea302008-07-24 20:25:57 +0000129 # can't be both a client and a server side test
130 if client and server:
Eric Li861b2d52011-02-04 14:50:35 -0800131 parser.parser.error("Can not specify a test as both server and client!")
mblighb2bea302008-07-24 20:25:57 +0000132
Alex Millercb79ba72013-05-29 14:43:00 -0700133 if provision and client:
134 parser.parser.error("Cannot specify provisioning and client!")
135
136 is_special_task = (verify or repair or cleanup or collect_crashinfo or
Dan Shi07e09af2013-04-12 09:31:29 -0700137 provision or reset)
Alex Millercb79ba72013-05-29 14:43:00 -0700138 if len(parser.args) < 1 and not is_special_task:
Eric Li861b2d52011-02-04 14:50:35 -0800139 parser.parser.error("Missing argument: control file")
mbligha46678d2008-05-01 20:00:01 +0000140
Aviv Keshet18ee3142013-08-12 15:01:51 -0700141 if ssh_verbosity > 0:
142 # ssh_verbosity is an integer between 0 and 3, inclusive
143 ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
Fang Dengd1c2b732013-08-20 12:59:46 -0700144 else:
145 ssh_verbosity_flag = ''
Aviv Keshet18ee3142013-08-12 15:01:51 -0700146
showard45ae8192008-11-05 19:32:53 +0000147 # We have a control file unless it's just a verify/repair/cleanup job
jadmanski0afbb632008-06-06 21:10:57 +0000148 if len(parser.args) > 0:
149 control = parser.args[0]
150 else:
151 control = None
mbligha46678d2008-05-01 20:00:01 +0000152
jadmanski0afbb632008-06-06 21:10:57 +0000153 if machines_file:
154 machines = []
155 for m in open(machines_file, 'r').readlines():
156 # remove comments, spaces
157 m = re.sub('#.*', '', m).strip()
158 if m:
159 machines.append(m)
160 print "Read list of machines from file: %s" % machines_file
161 print ','.join(machines)
mbligha46678d2008-05-01 20:00:01 +0000162
jadmanski0afbb632008-06-06 21:10:57 +0000163 if machines:
164 for machine in machines:
165 if not machine or re.search('\s', machine):
Eric Li861b2d52011-02-04 14:50:35 -0800166 parser.parser.error("Invalid machine: %s" % str(machine))
jadmanski0afbb632008-06-06 21:10:57 +0000167 machines = list(set(machines))
168 machines.sort()
mbligha46678d2008-05-01 20:00:01 +0000169
mbligh374f3412009-05-13 21:29:45 +0000170 if group_name and len(machines) < 2:
Eric Li861b2d52011-02-04 14:50:35 -0800171 parser.parser.error("-G %r may only be supplied with more than one machine."
mbligh374f3412009-05-13 21:29:45 +0000172 % group_name)
mbligh374f3412009-05-13 21:29:45 +0000173
Christopher Wiley8a91f232013-07-09 11:02:27 -0700174 kwargs = {'group_name': group_name, 'tag': execution_tag,
175 'disable_sysinfo': parser.options.disable_sysinfo}
mblighe0cbc912010-03-11 18:03:07 +0000176 if control_filename:
177 kwargs['control_filename'] = control_filename
jadmanski0afbb632008-06-06 21:10:57 +0000178 job = server_job.server_job(control, parser.args[1:], results, label,
179 user, machines, client, parse_job,
Fang Dengd1c2b732013-08-20 12:59:46 -0700180 ssh_user, ssh_port, ssh_pass,
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700181 ssh_verbosity_flag, ssh_options,
182 test_retry, **kwargs)
showard75cdfee2009-06-10 17:40:41 +0000183 job.logging.start_logging()
mbligh4608b002010-01-05 18:22:35 +0000184 job.init_parser()
mbligha46678d2008-05-01 20:00:01 +0000185
mbligh161fe6f2008-06-19 16:26:04 +0000186 # perform checks
187 job.precheck()
188
jadmanski0afbb632008-06-06 21:10:57 +0000189 # run the job
190 exit_code = 0
191 try:
mbligh332000a2009-06-08 16:47:28 +0000192 try:
193 if repair:
Alex Miller667b5f22014-02-28 15:33:39 -0800194 job.repair(host_protection, job_labels)
mbligh332000a2009-06-08 16:47:28 +0000195 elif verify:
Alex Miller667b5f22014-02-28 15:33:39 -0800196 job.verify(job_labels)
Alex Millercb79ba72013-05-29 14:43:00 -0700197 elif provision:
Alex Miller667b5f22014-02-28 15:33:39 -0800198 job.provision(job_labels)
Dan Shi07e09af2013-04-12 09:31:29 -0700199 elif reset:
Alex Miller667b5f22014-02-28 15:33:39 -0800200 job.reset(job_labels)
Fang Dengad78aca2014-10-02 18:15:46 -0700201 elif cleanup:
202 job.cleanup(job_labels)
mbligh332000a2009-06-08 16:47:28 +0000203 else:
Fang Dengad78aca2014-10-02 18:15:46 -0700204 job.run(install_before, install_after,
beepscb6f1e22013-06-28 19:14:10 -0700205 verify_job_repo_url=verify_job_repo_url,
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700206 only_collect_crashinfo=collect_crashinfo,
Alex Millerca76bcc2014-04-18 18:47:28 -0700207 skip_crash_collection=skip_crash_collection,
208 job_labels=job_labels)
mbligh332000a2009-06-08 16:47:28 +0000209 finally:
210 while job.hosts:
211 host = job.hosts.pop()
212 host.close()
jadmanski0afbb632008-06-06 21:10:57 +0000213 except:
jadmanski27b37ea2008-10-29 23:54:31 +0000214 exit_code = 1
jadmanski0afbb632008-06-06 21:10:57 +0000215 traceback.print_exc()
mbligha46678d2008-05-01 20:00:01 +0000216
mblighff7d61f2008-12-22 14:53:35 +0000217 if pid_file_manager:
218 pid_file_manager.num_tests_failed = job.num_tests_failed
219 pid_file_manager.close_file(exit_code)
jadmanskie0dffc32008-12-15 17:30:30 +0000220 job.cleanup_parser()
showard21baa452008-10-21 00:08:39 +0000221
jadmanski27b37ea2008-10-29 23:54:31 +0000222 sys.exit(exit_code)
mbligha46678d2008-05-01 20:00:01 +0000223
224
225def main():
Dan Shia1ecd5c2013-06-06 11:21:31 -0700226 # White list of tests with run time measurement enabled.
227 measure_run_time_tests_names = global_config.global_config.get_config_value(
228 'AUTOSERV', 'measure_run_time_tests', type=str)
229 if measure_run_time_tests_names:
230 measure_run_time_tests = [t.strip() for t in
231 measure_run_time_tests_names.split(',')]
232 else:
233 measure_run_time_tests = []
jadmanski0afbb632008-06-06 21:10:57 +0000234 # grab the parser
235 parser = autoserv_parser.autoserv_parser
mbligha5cb4062009-02-17 15:53:39 +0000236 parser.parse_args()
mbligha46678d2008-05-01 20:00:01 +0000237
jadmanski0afbb632008-06-06 21:10:57 +0000238 if len(sys.argv) == 1:
239 parser.parser.print_help()
240 sys.exit(1)
mbligha6f13082008-06-05 23:53:46 +0000241
showard75cdfee2009-06-10 17:40:41 +0000242 if parser.options.no_logging:
243 results = None
244 else:
245 results = parser.options.results
mbligh80e1eba2008-11-19 00:26:18 +0000246 if not results:
247 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
248 results = os.path.abspath(results)
showard566d3c02010-01-12 18:57:01 +0000249 resultdir_exists = False
250 for filename in ('control.srv', 'status.log', '.autoserv_execute'):
251 if os.path.exists(os.path.join(results, filename)):
252 resultdir_exists = True
mbligh4608b002010-01-05 18:22:35 +0000253 if not parser.options.use_existing_results and resultdir_exists:
mbligh80e1eba2008-11-19 00:26:18 +0000254 error = "Error: results directory already exists: %s\n" % results
255 sys.stderr.write(error)
256 sys.exit(1)
mbligha788dc42009-03-26 21:10:16 +0000257
258 # Now that we certified that there's no leftover results dir from
259 # previous jobs, lets create the result dir since the logging system
260 # needs to create the log file in there.
261 if not os.path.isdir(results):
262 os.makedirs(results)
showard75cdfee2009-06-10 17:40:41 +0000263
264 logging_manager.configure_logging(
265 server_logging_config.ServerLoggingConfig(), results_dir=results,
showard10d84172009-06-18 23:16:50 +0000266 use_console=not parser.options.no_tee,
267 verbose=parser.options.verbose,
268 no_console_prefix=parser.options.no_console_prefix)
showard75cdfee2009-06-10 17:40:41 +0000269 if results:
mbligha788dc42009-03-26 21:10:16 +0000270 logging.info("Results placed in %s" % results)
mbligh10717632008-11-19 00:21:57 +0000271
mbligh4608b002010-01-05 18:22:35 +0000272 # wait until now to perform this check, so it get properly logged
273 if parser.options.use_existing_results and not resultdir_exists:
274 logging.error("No existing results directory found: %s", results)
275 sys.exit(1)
276
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700277 logging.debug('autoserv command was: %s', ' '.join(sys.argv))
mbligh4608b002010-01-05 18:22:35 +0000278
mbligh80e1eba2008-11-19 00:26:18 +0000279 if parser.options.write_pidfile:
mbligh4608b002010-01-05 18:22:35 +0000280 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
281 results)
jadmanskid5ab8c52008-12-03 16:27:07 +0000282 pid_file_manager.open_file()
mblighff7d61f2008-12-22 14:53:35 +0000283 else:
284 pid_file_manager = None
mbligha46678d2008-05-01 20:00:01 +0000285
jadmanskif22fea82008-11-26 20:57:07 +0000286 autotest.BaseAutotest.set_install_in_tmpdir(
287 parser.options.install_in_tmpdir)
288
Dan Shia1ecd5c2013-06-06 11:21:31 -0700289 timer = None
290 try:
291 # Take the first argument as control file name, get the test name from
292 # the control file. If the test name exists in the list of tests with
293 # run time measurement enabled, start a timer to begin measurement.
294 if (len(parser.args) > 0 and parser.args[0] != '' and
295 parser.options.machines):
Dan Shibbc16132013-07-09 16:23:59 -0700296 try:
297 test_name = control_data.parse_control(parser.args[0],
298 raise_warnings=True).name
299 except control_data.ControlVariableException:
300 logging.debug('Failed to retrieve test name from control file.')
301 test_name = None
Dan Shia1ecd5c2013-06-06 11:21:31 -0700302 if test_name in measure_run_time_tests:
303 machines = parser.options.machines.replace(',', ' '
304 ).strip().split()
Dan Shi8eac5af2014-09-17 00:15:15 -0700305 try:
306 afe = frontend.AFE()
307 board = server_utils.get_board_from_afe(machines[0], afe)
308 timer = stats.Timer('autoserv_run_time.%s.%s' %
309 (board, test_name))
310 timer.start()
311 except (urllib2.HTTPError, urllib2.URLError):
312 # Ignore error if RPC failed to get board
313 pass
Dan Shia1ecd5c2013-06-06 11:21:31 -0700314 except control_data.ControlVariableException as e:
315 logging.error(str(e))
jadmanski0afbb632008-06-06 21:10:57 +0000316 exit_code = 0
Prashanth B6285f6a2014-05-08 18:01:27 -0700317 # TODO(beeps): Extend this to cover different failure modes.
318 # Testing exceptions are matched against labels sent to autoserv. Eg,
319 # to allow only the hostless job to run, specify
320 # testing_exceptions: test_suite in the shadow_config. To allow both
321 # the hostless job and dummy_Pass to run, specify
322 # testing_exceptions: test_suite,dummy_Pass. You can figure out
323 # what label autoserv is invoked with by looking through the logs of a test
324 # for the autoserv command's -l option.
325 testing_exceptions = global_config.global_config.get_config_value(
326 'AUTOSERV', 'testing_exceptions', type=list, default=[])
327 test_mode = global_config.global_config.get_config_value(
328 'AUTOSERV', 'testing_mode', type=bool, default=False)
329 test_mode = test_mode and not any([ex in parser.options.label
330 for ex in testing_exceptions])
jadmanski0afbb632008-06-06 21:10:57 +0000331 try:
332 try:
Prashanth B6285f6a2014-05-08 18:01:27 -0700333 if test_mode:
Jakob Jueliche497b552014-09-23 19:11:59 -0700334 time.sleep(TESTING_MODE_SLEEP_SECS)
Prashanth B6285f6a2014-05-08 18:01:27 -0700335 else:
336 run_autoserv(pid_file_manager, results, parser)
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700337 except SystemExit as e:
jadmanski0afbb632008-06-06 21:10:57 +0000338 exit_code = e.code
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700339 if exit_code:
340 logging.exception(e)
341 except Exception as e:
jadmanski0afbb632008-06-06 21:10:57 +0000342 # If we don't know what happened, we'll classify it as
343 # an 'abort' and return 1.
Aviv Keshet5c40ec62013-08-20 12:11:12 -0700344 logging.exception(e)
jadmanski0afbb632008-06-06 21:10:57 +0000345 exit_code = 1
346 finally:
mblighff7d61f2008-12-22 14:53:35 +0000347 if pid_file_manager:
348 pid_file_manager.close_file(exit_code)
Dan Shia1ecd5c2013-06-06 11:21:31 -0700349 if timer:
350 timer.stop()
jadmanski0afbb632008-06-06 21:10:57 +0000351 sys.exit(exit_code)
mblighfaf0cd42007-11-19 16:00:24 +0000352
mblighbb421852008-03-11 22:36:16 +0000353
mbligha46678d2008-05-01 20:00:01 +0000354if __name__ == '__main__':
jadmanski0afbb632008-06-06 21:10:57 +0000355 main()