Blame - site_utils/mass_command.py - platform/external/autotest

blob: a20a25d7c79768a108301aa3079de1b4bf0dbf64 [file] [log] [blame]

Scott Zawalski	20a9b58	2011-11-21 11:49:40 -0800	[diff] [blame]	1	#!/usr/bin/python
				2	#
				3	# Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
				4	# Use of this source code is governed by a BSD-style license that can be
				5	# found in the LICENSE file.
				6
				7	"""Executes on all unlocked hosts in Autotest lab in parallel at a given rate.
				8
				9	Used to run a command or script on all hosts, or only those of a given platform,
				10	in the Autotest lab. Allows a configurable number of commands to be started in
				11	parallel.
				12	"""
				13
				14
				15	import datetime
				16	import logging
				17	import optparse
				18	import os
				19	import time
				20
				21	import chromeos_test_common
				22	from chromeos_test import autotest_util
				23	from chromeos_test import common_util
				24	from chromeos_test import mp_log_util
				25	from chromeos_test import mp_thread_pool as tp
				26
				27	# Default number of hosts to run command/script in parallel.
				28	DEFAULT_CONCURRENCY = 64
				29
				30	# Default number of hosts to update in parallel.
				31	DEFAULT_UPDATE_CONCURRENCY = 24
				32
				33	# Default location of ChromeOS checkout.
				34	DEFAULT_GCLIENT_ROOT = '/usr/local/google/home/${USER}/chromeos'
				35
				36	# Default path for individual host logs. Each host will have it's own file. E.g.
				37	# <default_log_path>/<host>.log
				38	DEFAULT_LOG_PATH = ('/tmp/mass_command_logs/%s/'
				39	% time.strftime('%Y-%m-%d-%H-%M', time.localtime()))
				40
				41	# Default root path on remote device to copy scripts to
				42	DEFAULT_REMOTE_COPY_PATH = '/tmp/'
				43
				44	# Amount of seconds to wait before declaring an command/script has failed.
				45	DEFAULT_TIMEOUT = 120
				46
				47	# Amount of seconds to wait before declaring an update has failed.
				48	DEFAULT_UPDATE_TIMEOUT = 2400
				49
				50
				51	def ExecuteTask(failure_desc):
				52	"""Decorator for try/except/log pattern for reporting status and failures.
				53
				54	Args:
				55	failure_desc: Simple string description of task.
				56
				57	Returns:
				58	Decorator function to wrap a method call.
				59	"""
				60
				61	def DecoratorFunc(func):
				62	"""Function that takes the user called method as an argument."""
				63
				64	def WrappedFunc(self, *args):
				65	"""Function that wraps and executes user called method.
				66
				67	Args:
				68	self: Self object of the class method called by decorator.
				69	args: Arguments to user called method.
				70
				71	Returns:
				72	True/False if user called method succeeded.
				73	"""
				74	try:
				75	output = func(self, *args)
				76	if output:
				77	if self.output:
				78	self.output += '\n' + output
				79	else:
				80	self.output = output
				81	except common_util.ChromeOSTestError:
				82	if self.logger:
				83	self.logger.exception('Failed running %s %s.', self.host,
				84	failure_desc)
				85	self.result = failure_desc
				86	return False
				87	return True
				88
				89	return WrappedFunc
				90	return DecoratorFunc
				91
				92
				93	class HostWorker(object):
				94	"""Responsible for ssh-test, locking, executing, and unlocking a host."""
				95
				96	def __init__(self, host, options):
				97	"""Create instance to perform work on a host.
				98
				99	Args:
				100	host: IP address of the host to connect to.
				101	options: Command line options.
				102	"""
				103	self.host = host
				104	self.options = options
				105	self.result = None
				106	self.output = None
				107	self.logger = None
				108
				109	def Execute(self, logger=None):
				110	"""Callback method to execute the requested action on the host.
				111
				112	Usual sequence is to test connectivity by SSH-ing to the host, locking
				113	the host in Autotest, running the command, then unlocking the host.
				114
				115	Args:
				116	logger: optional logger.
				117
				118	Sets:
				119	self.result to 'PASS' or failure ['SSH', 'LOCK', 'COPY', 'CMD', 'URL'].
				120	self.output to standard out of command.
				121	"""
				122	try:
				123	if logger:
				124	# Store logger in self.logger so it is accessible in ExecuteTask.
				125	self.logger = logger
				126	logger.info('Executing for host %s', self.host)
				127
				128	if not self.options.skip_ssh:
				129	if not self.PingHost():
				130	return
				131
				132	if self.options.lock:
				133	if not self.LockUnlockHost(True):
				134	return
				135
				136	# Now that the host may be locked in Autotest the rest of the loop will
				137	# execute in a try/finally to make sure the host is still unlocked if
				138	# any of the remaining steps throw an exception.
				139	try:
				140	if self.options.url:
				141	if not self.ImageHost():
				142	return
				143	else:
				144	cmd = self.options.cmd
				145	if self.options.script:
				146	cmd = self.options.remote_file
				147	if not self.CopyToDevice():
				148	return
				149	if not self.SSHCmdOnHost(cmd, self.options.extra_args):
				150	return
				151	finally:
				152	if self.options.lock:
				153	self.LockUnlockHost(False)
				154
				155	self.result = 'PASS'
				156	self.ProcessResult()
				157
				158	finally:
				159	# Loggers hold a thread lock which cannot be pickled, so it must be
				160	# cleared before returning.
				161	self.logger = None
				162
				163	def ProcessResult(self):
				164	"""Dump the results to the screen and/or log file."""
				165	if self.logger:
				166	msg = [self.host, ' finished with ', self.result]
				167
				168	if self.options.echo_output:
				169	if self.output:
				170	msg += ['\nStdOut=[\n', self.output, '\n]']
				171	self.logger.info(''.join(msg))
				172
				173	if not self.options.no_log_files:
				174	log = open(os.path.join(self.options.log_path, self.host + '.log'), 'w')
				175	log.write(self.output)
				176	log.close()
				177
				178	@ExecuteTask('SSH')
				179	def PingHost(self):
				180	"""Tests if the requested host is reachable over SSH."""
				181	msg = 'Failed to ssh to host=%s' % self.host
				182	return common_util.RemoteCommand(self.host, 'root', 'true', error_msg=msg,
				183	output=True)
				184
				185	@ExecuteTask('CMD')
				186	def SSHCmdOnHost(self, command, args=None):
				187	"""Executes a command on the target host using an SSH connection.
				188
				189	Args:
				190	command: Command to run.
				191	args: Extra arguments to main command to run on the remote host.
				192
				193	Returns:
				194	String output from the command.
				195	"""
				196	cmd = '"%s %s"' % (command, args)
				197	msg = 'Failed to run command=%s' % cmd
				198	return common_util.RemoteCommand(self.host, 'root', cmd, error_msg=msg,
				199	output=True)
				200
				201	@ExecuteTask('COPY')
				202	def CopyToDevice(self):
				203	"""Copies a file (usually a script file) to a host using scp.
				204
				205	Returns:
				206	String output from the command.
				207	"""
				208	msg = 'Failed to copy %s to root@%s:%s'% (self.options.script, self.host,
				209	self.options.remote_file)
				210	return common_util.RemoteCopy(self.host, 'root', self.options.script,
				211	self.options.remote_file, error_msg=msg,
				212	output=True)
				213
				214	@ExecuteTask('URL')
				215	def ImageHost(self):
				216	"""Uses the image_to_live script to update a host.
				217
				218	Returns:
				219	String output from the command.
				220	"""
				221	cmd = ('/usr/local/scripts/alarm %d %s/src/scripts/image_to_live.sh '
				222	'--update_url %s --remote %s' % (self.options.timeout,
				223	self.options.gclient,
				224	self.options.url, self.host))
				225	return common_util.RunCommand(cmd, output=True)
				226
				227	@ExecuteTask('LOCK')
				228	def LockUnlockHost(self, lock=True):
				229	"""Locks a host using the atest CLI.
				230
				231	Locking a host tells Autotest that the host shouldn't be scheduled for
				232	any other tasks. Returns true if the locking process was successful.
				233
				234	Args:
				235	lock: True=lock the host, False=unlock the host.
				236
				237	Returns:
				238	String output from the command.
				239	"""
				240	if lock:
				241	cmd = '%s host mod -l %s' % (self.options.cli, self.host)
				242	else:
				243	cmd = '%s host mod -u %s' % (self.options.cli, self.host)
				244	return common_util.RunCommand(cmd, output=True)
				245
				246
				247	class CommandManager(object):
				248	"""Executes a command on all of the selected remote hosts.
				249
				250	The hosts are selected from Autotest using the parameters supplied on the
				251	command line.
				252	"""
				253
				254	def __init__(self):
				255	self.options = self.ParseOptions()
				256	mp_log_util.InitializeLogging(**vars(self.options))
				257	if self.options.ip_addr:
				258	self.host_list = [self.options.ip_addr]
				259	else:
				260	self.host_list = autotest_util.GetHostList(self.options.cli,
				261	self.options.acl,
				262	self.options.label,
				263	self.options.user,
				264	self.options.status)
				265
				266	@staticmethod
				267	def ParseOptions():
				268	"""Grab the options from the command line."""
				269
				270	parser = optparse.OptionParser(
				271	'Used to run a command or script or update on all hosts, or only those '
				272	'of a given platform, in the Autotest lab. Allows a configurable '
				273	'number of commands to be started in parallel.\n\n'
				274	'\texample: %prog [options] command\n\n'
				275	'Arguments after command are interpreted as arguments to the command.\n'
				276	'\n\texample: %prog [options] command [cmd_arg_1] [cmd_arg_2]\n\n'
				277	'Multiple command can be run by enclosing them in quotation marks.\n\n'
				278	'\texample: %prog [options] "command1; command2; command2"\n\n'
				279	'When using the --script option, additional arguments are interpreted '
				280	'as script options and are passed to the script after being copied to '
				281	'the remote device.\n\n'
				282	'\texample: %prog [options] --script /path/to/script.sh '
				283	'[script_arg_1] [script_arg_2] [script_arg_3]\n\n'
				284	'When using the --url option specify the path to the new build. '
				285	'Additional arguments are ignored.\n\n'
				286	'\texample: %prog [options] --url /path/to/build')
				287
				288	# Args for describing the environment of the server machine
				289	group = optparse.OptionGroup(
				290	parser, 'Server Configuration', 'Options that specify the layout of '
				291	'the machine hosting this script.')
				292	group.add_option(
				293	'-g', '--gclient', default=DEFAULT_GCLIENT_ROOT,
				294	help=('Location of ChromeOS checkout. [default: %default]'))
				295	parser.add_option_group(group)
				296
				297	# Args for configuring logging.
				298	group = mp_log_util.AddOptions(parser)
				299	group.add_option(
				300	'--log_path', default=DEFAULT_LOG_PATH,
				301	help=('Where to put individual host log files. [default: %default]'))
				302	group.add_option(
				303	'-n', '--no_log_files', default=False, action='store_true',
				304	help=('Skip writing output to files, instead display results on the '
				305	'console window only. [default: %default]'))
				306	group.add_option(
				307	'-e', '--echo_output', default=False, action='store_true',
				308	help=('Write command output to console. [default: %default]'))
				309	parser.add_option_group(group)
				310
				311	# Args for selecting machines from Autotest
				312	group = autotest_util.AddOptions(parser)
				313	group.add_option(
				314	'-i', '--ip_addr',
				315	help=('IP address of single machine to run on.'))
				316	parser.add_option_group(group)
				317
				318	# Args for defining how to run tasks from the server
				319	group = optparse.OptionGroup(
				320	parser, 'Execution Options', 'Options that define how commands are '
				321	'run on the remote machines.')
				322	group.add_option(
				323	'-p', '--parallel', type='int', default=DEFAULT_CONCURRENCY,
				324	help=('Number of hosts to be run concurrently. '
				325	'[default: %default].'))
				326	group.add_option(
				327	'-t', '--timeout', type='int', default=DEFAULT_TIMEOUT,
				328	help=('Time to wait before killing the attempt to run command. '
				329	'[default: %default]'))
				330	group.add_option(
				331	'--skip_ssh', default=False, action='store_true',
				332	help=('Skip SSH check before running on each device. '
				333	'[default: %default]'))
				334	group.add_option(
				335	'-l', '--lock', default=False, action='store_true',
				336	help='Lock device in Autotest while running. [default: %default]')
				337	parser.add_option_group(group)
				338
				339	# Args for the action to take on each remote device
				340	group = optparse.OptionGroup(
				341	parser, 'Main Options', 'Options that define main action. Selecting '
				342	'neither --script nor --url defaults to running a command on the '
				343	'hosts.')
				344	group.add_option(
				345	'-s', '--script', nargs=2,
				346	help=('Path to script to copy to host then execute. 2 args are '
				347	'required. If the script does not take any args pass an empty '
				348	'string \" \"'))
				349	group.add_option(
				350	'--url',
				351	help=('Run image_to_live.sh with provided image URL. Note: Resets '
				352	'defaults for --lock=TRUE and --timeout=2400 and --parallel='
				353	'24.'))
				354	parser.add_option_group(group)
				355
				356	options, args = parser.parse_args()
				357
				358	options.cmd = None
				359	options.extra_args = None
				360	options.remote_file = None
				361
				362	# If script/url was not specified, the remaining args are commands.
				363	if not options.script and not options.url:
				364	if not args:
				365	parser.error('Either script, command, or URL must be selected.')
				366	else:
				367	options.cmd, options.extra_args = args[0], ' '.join(args[1:])
				368
				369	# Grab the arguments to the script and setup any extra args.
				370	if options.script:
				371	options.script, options.extra_args = options.script[0], options.script[1]
				372	options.remote_file = os.path.join(DEFAULT_REMOTE_COPY_PATH,
				373	options.script.split(os.path.sep)[-1])
				374	else:
				375	options.remote_file = ''
				376
				377	# For updates reset default lock and timeout.
				378	if options.url:
				379	# Only modify these options if they still have their default values. If
				380	# the user has already overwritten them keep the users values.
				381	if options.timeout == DEFAULT_TIMEOUT:
				382	options.timeout = DEFAULT_UPDATE_TIMEOUT
				383	if options.parallel == DEFAULT_CONCURRENCY:
				384	options.parallel = DEFAULT_UPDATE_CONCURRENCY
				385
				386	# Create log folder if it doesn't exist.
				387	if not options.no_log_files and not os.path.exists(options.log_path):
				388	os.makedirs(options.log_path)
				389
				390	return options
				391
				392
				393	def ProcessResults(results, result_type):
				394	"""Dump the results to the screen and/or log file.
				395
				396	Args:
				397	results: Hosts with the same result type.
				398	result_type: String description of the result type.
				399	"""
				400	msg = '%d hosts %s.\n' % (len(results), result_type)
				401	msg += ', '.join(results)
				402	mp_log_util.LogWithHeader(msg, width=80, symbol='-')
				403
				404
				405	def main():
				406	"""Run commands in parallel on remote hosts."""
				407	script_start_time = datetime.datetime.now()
				408	cm = CommandManager()
				409	if not cm.host_list:
				410	logging.error('No hosts found.')
				411	return
				412	logging.info('Found %d hosts.', len(cm.host_list))
				413
				414	# Create work object for each host retrieved.
				415	hosts = [HostWorker(host, cm.options) for host in cm.host_list]
				416
				417	# Submit work to pool.
				418	mp_tp = tp.MultiProcWorkPool(max_threads=cm.options.parallel)
				419	hosts = mp_tp.ExecuteWorkItems(
				420	hosts, provide_logger=True,
				421	logger_init_callback=mp_log_util.InitializeLogging, **vars(cm.options))
				422
				423	# Now that work is done, output results.
				424	status_strings = {'PASS': 'succeeded',
				425	'SSH': 'failed connecting via SSH',
				426	'LOCK': 'failed locking in Autotest',
				427	'COPY': 'failed copying script',
				428	'CMD': 'failed executing command',
				429	'URL': 'failed updating image'}
				430	results = {}
				431	for key in status_strings:
				432	results[key] = []
				433
				434	# Divide results by result type for prettier reporting.
				435	for h in hosts:
				436	results[h.result].append(h.host)
				437
				438	# Output final results.
				439	for result, hosts in results.items():
				440	if hosts:
				441	ProcessResults(hosts, status_strings[result])
				442
				443	if not cm.options.no_log_files:
				444	logging.info('Log files located in %s', cm.options.log_path)
				445
				446	# Follow up with some timing info.
				447	script_runtime = datetime.datetime.now() - script_start_time
				448	logging.info('Running Time = %d.%d seconds.',
				449	script_runtime.seconds, script_runtime.microseconds)
				450
				451
				452	if __name__ == '__main__':
				453	main()