blob: a20a25d7c79768a108301aa3079de1b4bf0dbf64 [file] [log] [blame]
Scott Zawalski20a9b582011-11-21 11:49:40 -08001#!/usr/bin/python
2#
3# Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Executes on all unlocked hosts in Autotest lab in parallel at a given rate.
8
9Used to run a command or script on all hosts, or only those of a given platform,
10in the Autotest lab. Allows a configurable number of commands to be started in
11parallel.
12"""
13
14
15import datetime
16import logging
17import optparse
18import os
19import time
20
21import chromeos_test_common
22from chromeos_test import autotest_util
23from chromeos_test import common_util
24from chromeos_test import mp_log_util
25from chromeos_test import mp_thread_pool as tp
26
27# Default number of hosts to run command/script in parallel.
28DEFAULT_CONCURRENCY = 64
29
30# Default number of hosts to update in parallel.
31DEFAULT_UPDATE_CONCURRENCY = 24
32
33# Default location of ChromeOS checkout.
34DEFAULT_GCLIENT_ROOT = '/usr/local/google/home/${USER}/chromeos'
35
36# Default path for individual host logs. Each host will have it's own file. E.g.
37# <default_log_path>/<host>.log
38DEFAULT_LOG_PATH = ('/tmp/mass_command_logs/%s/'
39 % time.strftime('%Y-%m-%d-%H-%M', time.localtime()))
40
41# Default root path on remote device to copy scripts to
42DEFAULT_REMOTE_COPY_PATH = '/tmp/'
43
44# Amount of seconds to wait before declaring an command/script has failed.
45DEFAULT_TIMEOUT = 120
46
47# Amount of seconds to wait before declaring an update has failed.
48DEFAULT_UPDATE_TIMEOUT = 2400
49
50
51def ExecuteTask(failure_desc):
52 """Decorator for try/except/log pattern for reporting status and failures.
53
54 Args:
55 failure_desc: Simple string description of task.
56
57 Returns:
58 Decorator function to wrap a method call.
59 """
60
61 def DecoratorFunc(func):
62 """Function that takes the user called method as an argument."""
63
64 def WrappedFunc(self, *args):
65 """Function that wraps and executes user called method.
66
67 Args:
68 self: Self object of the class method called by decorator.
69 args: Arguments to user called method.
70
71 Returns:
72 True/False if user called method succeeded.
73 """
74 try:
75 output = func(self, *args)
76 if output:
77 if self.output:
78 self.output += '\n' + output
79 else:
80 self.output = output
81 except common_util.ChromeOSTestError:
82 if self.logger:
83 self.logger.exception('Failed running %s %s.', self.host,
84 failure_desc)
85 self.result = failure_desc
86 return False
87 return True
88
89 return WrappedFunc
90 return DecoratorFunc
91
92
93class HostWorker(object):
94 """Responsible for ssh-test, locking, executing, and unlocking a host."""
95
96 def __init__(self, host, options):
97 """Create instance to perform work on a host.
98
99 Args:
100 host: IP address of the host to connect to.
101 options: Command line options.
102 """
103 self.host = host
104 self.options = options
105 self.result = None
106 self.output = None
107 self.logger = None
108
109 def Execute(self, logger=None):
110 """Callback method to execute the requested action on the host.
111
112 Usual sequence is to test connectivity by SSH-ing to the host, locking
113 the host in Autotest, running the command, then unlocking the host.
114
115 Args:
116 logger: optional logger.
117
118 Sets:
119 self.result to 'PASS' or failure ['SSH', 'LOCK', 'COPY', 'CMD', 'URL'].
120 self.output to standard out of command.
121 """
122 try:
123 if logger:
124 # Store logger in self.logger so it is accessible in ExecuteTask.
125 self.logger = logger
126 logger.info('Executing for host %s', self.host)
127
128 if not self.options.skip_ssh:
129 if not self.PingHost():
130 return
131
132 if self.options.lock:
133 if not self.LockUnlockHost(True):
134 return
135
136 # Now that the host may be locked in Autotest the rest of the loop will
137 # execute in a try/finally to make sure the host is still unlocked if
138 # any of the remaining steps throw an exception.
139 try:
140 if self.options.url:
141 if not self.ImageHost():
142 return
143 else:
144 cmd = self.options.cmd
145 if self.options.script:
146 cmd = self.options.remote_file
147 if not self.CopyToDevice():
148 return
149 if not self.SSHCmdOnHost(cmd, self.options.extra_args):
150 return
151 finally:
152 if self.options.lock:
153 self.LockUnlockHost(False)
154
155 self.result = 'PASS'
156 self.ProcessResult()
157
158 finally:
159 # Loggers hold a thread lock which cannot be pickled, so it must be
160 # cleared before returning.
161 self.logger = None
162
163 def ProcessResult(self):
164 """Dump the results to the screen and/or log file."""
165 if self.logger:
166 msg = [self.host, ' finished with ', self.result]
167
168 if self.options.echo_output:
169 if self.output:
170 msg += ['\nStdOut=[\n', self.output, '\n]']
171 self.logger.info(''.join(msg))
172
173 if not self.options.no_log_files:
174 log = open(os.path.join(self.options.log_path, self.host + '.log'), 'w')
175 log.write(self.output)
176 log.close()
177
178 @ExecuteTask('SSH')
179 def PingHost(self):
180 """Tests if the requested host is reachable over SSH."""
181 msg = 'Failed to ssh to host=%s' % self.host
182 return common_util.RemoteCommand(self.host, 'root', 'true', error_msg=msg,
183 output=True)
184
185 @ExecuteTask('CMD')
186 def SSHCmdOnHost(self, command, args=None):
187 """Executes a command on the target host using an SSH connection.
188
189 Args:
190 command: Command to run.
191 args: Extra arguments to main command to run on the remote host.
192
193 Returns:
194 String output from the command.
195 """
196 cmd = '"%s %s"' % (command, args)
197 msg = 'Failed to run command=%s' % cmd
198 return common_util.RemoteCommand(self.host, 'root', cmd, error_msg=msg,
199 output=True)
200
201 @ExecuteTask('COPY')
202 def CopyToDevice(self):
203 """Copies a file (usually a script file) to a host using scp.
204
205 Returns:
206 String output from the command.
207 """
208 msg = 'Failed to copy %s to root@%s:%s'% (self.options.script, self.host,
209 self.options.remote_file)
210 return common_util.RemoteCopy(self.host, 'root', self.options.script,
211 self.options.remote_file, error_msg=msg,
212 output=True)
213
214 @ExecuteTask('URL')
215 def ImageHost(self):
216 """Uses the image_to_live script to update a host.
217
218 Returns:
219 String output from the command.
220 """
221 cmd = ('/usr/local/scripts/alarm %d %s/src/scripts/image_to_live.sh '
222 '--update_url %s --remote %s' % (self.options.timeout,
223 self.options.gclient,
224 self.options.url, self.host))
225 return common_util.RunCommand(cmd, output=True)
226
227 @ExecuteTask('LOCK')
228 def LockUnlockHost(self, lock=True):
229 """Locks a host using the atest CLI.
230
231 Locking a host tells Autotest that the host shouldn't be scheduled for
232 any other tasks. Returns true if the locking process was successful.
233
234 Args:
235 lock: True=lock the host, False=unlock the host.
236
237 Returns:
238 String output from the command.
239 """
240 if lock:
241 cmd = '%s host mod -l %s' % (self.options.cli, self.host)
242 else:
243 cmd = '%s host mod -u %s' % (self.options.cli, self.host)
244 return common_util.RunCommand(cmd, output=True)
245
246
247class CommandManager(object):
248 """Executes a command on all of the selected remote hosts.
249
250 The hosts are selected from Autotest using the parameters supplied on the
251 command line.
252 """
253
254 def __init__(self):
255 self.options = self.ParseOptions()
256 mp_log_util.InitializeLogging(**vars(self.options))
257 if self.options.ip_addr:
258 self.host_list = [self.options.ip_addr]
259 else:
260 self.host_list = autotest_util.GetHostList(self.options.cli,
261 self.options.acl,
262 self.options.label,
263 self.options.user,
264 self.options.status)
265
266 @staticmethod
267 def ParseOptions():
268 """Grab the options from the command line."""
269
270 parser = optparse.OptionParser(
271 'Used to run a command or script or update on all hosts, or only those '
272 'of a given platform, in the Autotest lab. Allows a configurable '
273 'number of commands to be started in parallel.\n\n'
274 '\texample: %prog [options] command\n\n'
275 'Arguments after command are interpreted as arguments to the command.\n'
276 '\n\texample: %prog [options] command [cmd_arg_1] [cmd_arg_2]\n\n'
277 'Multiple command can be run by enclosing them in quotation marks.\n\n'
278 '\texample: %prog [options] "command1; command2; command2"\n\n'
279 'When using the --script option, additional arguments are interpreted '
280 'as script options and are passed to the script after being copied to '
281 'the remote device.\n\n'
282 '\texample: %prog [options] --script /path/to/script.sh '
283 '[script_arg_1] [script_arg_2] [script_arg_3]\n\n'
284 'When using the --url option specify the path to the new build. '
285 'Additional arguments are ignored.\n\n'
286 '\texample: %prog [options] --url /path/to/build')
287
288 # Args for describing the environment of the server machine
289 group = optparse.OptionGroup(
290 parser, 'Server Configuration', 'Options that specify the layout of '
291 'the machine hosting this script.')
292 group.add_option(
293 '-g', '--gclient', default=DEFAULT_GCLIENT_ROOT,
294 help=('Location of ChromeOS checkout. [default: %default]'))
295 parser.add_option_group(group)
296
297 # Args for configuring logging.
298 group = mp_log_util.AddOptions(parser)
299 group.add_option(
300 '--log_path', default=DEFAULT_LOG_PATH,
301 help=('Where to put individual host log files. [default: %default]'))
302 group.add_option(
303 '-n', '--no_log_files', default=False, action='store_true',
304 help=('Skip writing output to files, instead display results on the '
305 'console window only. [default: %default]'))
306 group.add_option(
307 '-e', '--echo_output', default=False, action='store_true',
308 help=('Write command output to console. [default: %default]'))
309 parser.add_option_group(group)
310
311 # Args for selecting machines from Autotest
312 group = autotest_util.AddOptions(parser)
313 group.add_option(
314 '-i', '--ip_addr',
315 help=('IP address of single machine to run on.'))
316 parser.add_option_group(group)
317
318 # Args for defining how to run tasks from the server
319 group = optparse.OptionGroup(
320 parser, 'Execution Options', 'Options that define how commands are '
321 'run on the remote machines.')
322 group.add_option(
323 '-p', '--parallel', type='int', default=DEFAULT_CONCURRENCY,
324 help=('Number of hosts to be run concurrently. '
325 '[default: %default].'))
326 group.add_option(
327 '-t', '--timeout', type='int', default=DEFAULT_TIMEOUT,
328 help=('Time to wait before killing the attempt to run command. '
329 '[default: %default]'))
330 group.add_option(
331 '--skip_ssh', default=False, action='store_true',
332 help=('Skip SSH check before running on each device. '
333 '[default: %default]'))
334 group.add_option(
335 '-l', '--lock', default=False, action='store_true',
336 help='Lock device in Autotest while running. [default: %default]')
337 parser.add_option_group(group)
338
339 # Args for the action to take on each remote device
340 group = optparse.OptionGroup(
341 parser, 'Main Options', 'Options that define main action. Selecting '
342 'neither --script nor --url defaults to running a command on the '
343 'hosts.')
344 group.add_option(
345 '-s', '--script', nargs=2,
346 help=('Path to script to copy to host then execute. 2 args are '
347 'required. If the script does not take any args pass an empty '
348 'string \" \"'))
349 group.add_option(
350 '--url',
351 help=('Run image_to_live.sh with provided image URL. Note: Resets '
352 'defaults for --lock=TRUE and --timeout=2400 and --parallel='
353 '24.'))
354 parser.add_option_group(group)
355
356 options, args = parser.parse_args()
357
358 options.cmd = None
359 options.extra_args = None
360 options.remote_file = None
361
362 # If script/url was not specified, the remaining args are commands.
363 if not options.script and not options.url:
364 if not args:
365 parser.error('Either script, command, or URL must be selected.')
366 else:
367 options.cmd, options.extra_args = args[0], ' '.join(args[1:])
368
369 # Grab the arguments to the script and setup any extra args.
370 if options.script:
371 options.script, options.extra_args = options.script[0], options.script[1]
372 options.remote_file = os.path.join(DEFAULT_REMOTE_COPY_PATH,
373 options.script.split(os.path.sep)[-1])
374 else:
375 options.remote_file = ''
376
377 # For updates reset default lock and timeout.
378 if options.url:
379 # Only modify these options if they still have their default values. If
380 # the user has already overwritten them keep the users values.
381 if options.timeout == DEFAULT_TIMEOUT:
382 options.timeout = DEFAULT_UPDATE_TIMEOUT
383 if options.parallel == DEFAULT_CONCURRENCY:
384 options.parallel = DEFAULT_UPDATE_CONCURRENCY
385
386 # Create log folder if it doesn't exist.
387 if not options.no_log_files and not os.path.exists(options.log_path):
388 os.makedirs(options.log_path)
389
390 return options
391
392
393def ProcessResults(results, result_type):
394 """Dump the results to the screen and/or log file.
395
396 Args:
397 results: Hosts with the same result type.
398 result_type: String description of the result type.
399 """
400 msg = '%d hosts %s.\n' % (len(results), result_type)
401 msg += ', '.join(results)
402 mp_log_util.LogWithHeader(msg, width=80, symbol='-')
403
404
405def main():
406 """Run commands in parallel on remote hosts."""
407 script_start_time = datetime.datetime.now()
408 cm = CommandManager()
409 if not cm.host_list:
410 logging.error('No hosts found.')
411 return
412 logging.info('Found %d hosts.', len(cm.host_list))
413
414 # Create work object for each host retrieved.
415 hosts = [HostWorker(host, cm.options) for host in cm.host_list]
416
417 # Submit work to pool.
418 mp_tp = tp.MultiProcWorkPool(max_threads=cm.options.parallel)
419 hosts = mp_tp.ExecuteWorkItems(
420 hosts, provide_logger=True,
421 logger_init_callback=mp_log_util.InitializeLogging, **vars(cm.options))
422
423 # Now that work is done, output results.
424 status_strings = {'PASS': 'succeeded',
425 'SSH': 'failed connecting via SSH',
426 'LOCK': 'failed locking in Autotest',
427 'COPY': 'failed copying script',
428 'CMD': 'failed executing command',
429 'URL': 'failed updating image'}
430 results = {}
431 for key in status_strings:
432 results[key] = []
433
434 # Divide results by result type for prettier reporting.
435 for h in hosts:
436 results[h.result].append(h.host)
437
438 # Output final results.
439 for result, hosts in results.items():
440 if hosts:
441 ProcessResults(hosts, status_strings[result])
442
443 if not cm.options.no_log_files:
444 logging.info('Log files located in %s', cm.options.log_path)
445
446 # Follow up with some timing info.
447 script_runtime = datetime.datetime.now() - script_start_time
448 logging.info('Running Time = %d.%d seconds.',
449 script_runtime.seconds, script_runtime.microseconds)
450
451
452if __name__ == '__main__':
453 main()