blob: e19fc733ed690ebfc11d225da70791e649a90d03 [file] [log] [blame]
mblighdcd57a82007-07-11 23:06:47 +00001#!/usr/bin/python
2#
3# Copyright 2007 Google Inc. Released under the GPL v2
4
mbligh7d2bde82007-08-02 16:26:10 +00005"""
6This module defines the SSHHost class.
mblighdcd57a82007-07-11 23:06:47 +00007
8Implementation details:
9You should import the "hosts" package instead of importing each type of host.
10
11 SSHHost: a remote machine with a ssh access
12"""
13
mbligh7d2bde82007-08-02 16:26:10 +000014__author__ = """
15mbligh@google.com (Martin J. Bligh),
mblighdcd57a82007-07-11 23:06:47 +000016poirier@google.com (Benjamin Poirier),
mbligh7d2bde82007-08-02 16:26:10 +000017stutsman@google.com (Ryan Stutsman)
18"""
mblighdcd57a82007-07-11 23:06:47 +000019
20
mblighde384372007-10-17 04:25:37 +000021import types, os, sys, signal, subprocess, time, re, socket
mbligh03f4fc72007-11-29 20:56:14 +000022import base_classes, utils, bootloader
23
24from common.error import *
mblighdcd57a82007-07-11 23:06:47 +000025
26
27class SSHHost(base_classes.RemoteHost):
mbligh7d2bde82007-08-02 16:26:10 +000028 """
29 This class represents a remote machine controlled through an ssh
mblighdcd57a82007-07-11 23:06:47 +000030 session on which you can run programs.
mbligh7d2bde82007-08-02 16:26:10 +000031
mblighdcd57a82007-07-11 23:06:47 +000032 It is not the machine autoserv is running on. The machine must be
33 configured for password-less login, for example through public key
34 authentication.
mbligh7d2bde82007-08-02 16:26:10 +000035
mbligh3409ee72007-10-16 23:58:33 +000036 It includes support for controlling the machine through a serial
37 console on which you can run programs. If such a serial console is
38 set up on the machine then capabilities such as hard reset and
39 boot strap monitoring are available. If the machine does not have a
40 serial console available then ordinary SSH-based commands will
41 still be available, but attempts to use extensions such as
42 console logging or hard reset will fail silently.
43
mblighdcd57a82007-07-11 23:06:47 +000044 Implementation details:
45 This is a leaf class in an abstract class hierarchy, it must
46 implement the unimplemented methods in parent classes.
47 """
mbligh7d2bde82007-08-02 16:26:10 +000048
mbligh31a49de2007-11-05 18:41:19 +000049 DEFAULT_REBOOT_TIMEOUT = 1800
50 job = None
mbligh0faf91f2007-10-18 03:10:48 +000051
mblighde384372007-10-17 04:25:37 +000052 def __init__(self, hostname, user="root", port=22, initialize=True,
mbligh7c5452d2007-11-05 18:35:31 +000053 conmux_log="console.log", conmux_warnings="status.log",
mblighe6c995f2007-10-26 19:43:01 +000054 conmux_server=None, conmux_attach=None,
mblighda13d542008-01-03 16:28:34 +000055 netconsole_log=None, netconsole_port=6666, autodir=None):
mbligh7d2bde82007-08-02 16:26:10 +000056 """
57 Construct a SSHHost object
mblighdcd57a82007-07-11 23:06:47 +000058
59 Args:
60 hostname: network hostname or address of remote machine
61 user: user to log in as on the remote machine
62 port: port the ssh daemon is listening on on the remote
63 machine
mbligh9708f732007-10-18 03:18:54 +000064 """
mblighdcd57a82007-07-11 23:06:47 +000065 self.hostname= hostname
66 self.user= user
67 self.port= port
68 self.tmp_dirs= []
mbligh137a05c2007-10-04 15:56:51 +000069 self.initialize = initialize
mblighda13d542008-01-03 16:28:34 +000070 self.autodir = autodir
mbligh91334902007-09-28 01:47:59 +000071
mbligh9708f732007-10-18 03:18:54 +000072 super(SSHHost, self).__init__()
73
mbligh3409ee72007-10-16 23:58:33 +000074 self.conmux_server = conmux_server
mbligh70cf0ec2008-01-18 17:57:14 +000075 if conmux_attach:
76 self.conmux_attach = conmux_attach
77 else:
78 self.conmux_attach = os.path.abspath(os.path.join(
79 self.serverdir, '..',
80 'conmux', 'conmux-attach'))
mblighfbb03542008-02-11 16:27:29 +000081 self.logger_popen = None
mblighde384372007-10-17 04:25:37 +000082 self.__start_console_log(conmux_log)
mblighfbb03542008-02-11 16:27:29 +000083 self.warning_popen = None
mblighe6c995f2007-10-26 19:43:01 +000084 self.__start_warning_log(conmux_warnings)
mbligh3409ee72007-10-16 23:58:33 +000085
mbligha0452c82007-08-08 20:24:57 +000086 self.bootloader = bootloader.Bootloader(self)
mbligh7d2bde82007-08-02 16:26:10 +000087
mblighc0e92392007-11-05 19:10:10 +000088 self.__netconsole_param = ""
mblighfbb03542008-02-11 16:27:29 +000089 self.netlogger_popen = None
mblighc0e92392007-11-05 19:10:10 +000090 if netconsole_log:
91 self.__init_netconsole_params(netconsole_port)
92 self.__start_netconsole_log(netconsole_log, netconsole_port)
93 self.__load_netconsole_module()
mblighde384372007-10-17 04:25:37 +000094
mbligh7d2bde82007-08-02 16:26:10 +000095
mblighfbb03542008-02-11 16:27:29 +000096 @staticmethod
97 def __kill(popen, kill_pg):
98 return_code = popen.poll()
99 if return_code is not None:
100 return
101
102 # return_code is None -> child is still running
103 if kill_pg:
104 pgid = os.getpgid(popen.pid)
105 assert pgid != os.getpgid(0)
106 try:
107 os.killpg(pgid, signal.SIGTERM)
108 except OSError:
109 pass
110 else:
111 try:
112 os.kill(popen.pid)
113 except OSError:
114 pass
115
116
mblighdcd57a82007-07-11 23:06:47 +0000117 def __del__(self):
mbligh7d2bde82007-08-02 16:26:10 +0000118 """
119 Destroy a SSHHost object
mblighdcd57a82007-07-11 23:06:47 +0000120 """
121 for dir in self.tmp_dirs:
122 try:
123 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
mbligh03f4fc72007-11-29 20:56:14 +0000124 except AutoservRunError:
mblighdcd57a82007-07-11 23:06:47 +0000125 pass
mblighde384372007-10-17 04:25:37 +0000126 # kill the console logger
mblighfbb03542008-02-11 16:27:29 +0000127 if getattr(self, 'logger_popen', None):
128 self.__kill(self.logger_popen, True)
mblighde384372007-10-17 04:25:37 +0000129 # kill the netconsole logger
mblighfbb03542008-02-11 16:27:29 +0000130 if getattr(self, 'netlogger_popen', None):
mblighe6c995f2007-10-26 19:43:01 +0000131 self.__unload_netconsole_module()
mblighfbb03542008-02-11 16:27:29 +0000132 self.__kill(self.netlogger_popen, False)
mblighe6c995f2007-10-26 19:43:01 +0000133 # kill the warning logger
mblighfbb03542008-02-11 16:27:29 +0000134 if getattr(self, 'warning_popen', None):
135 self.__kill(self.warning_popen, True)
mblighde384372007-10-17 04:25:37 +0000136
137
138 def __init_netconsole_params(self, port):
139 """
140 Connect to the remote machine and determine the values to use for the
141 required netconsole parameters.
142 """
mblighde384372007-10-17 04:25:37 +0000143 # PROBLEM: on machines with multiple IPs this may not make any sense
144 # It also doesn't work with IPv6
145 remote_ip = socket.gethostbyname(self.hostname)
146 local_ip = socket.gethostbyname(socket.gethostname())
147 # Get the gateway of the remote machine
148 try:
149 traceroute = self.run('traceroute -n %s' % local_ip)
mbligh03f4fc72007-11-29 20:56:14 +0000150 except AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000151 return
152 first_node = traceroute.stdout.split("\n")[0]
153 match = re.search(r'\s+((\d+\.){3}\d+)\s+', first_node)
154 if match:
155 router_ip = match.group(1)
156 else:
157 return
158 # Look up the MAC address of the gateway
159 try:
160 self.run('ping -c 1 %s' % router_ip)
161 arp = self.run('arp -n -a %s' % router_ip)
mbligh03f4fc72007-11-29 20:56:14 +0000162 except AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000163 return
164 match = re.search(r'\s+(([0-9A-F]{2}:){5}[0-9A-F]{2})\s+', arp.stdout)
165 if match:
166 gateway_mac = match.group(1)
167 else:
168 return
169 self.__netconsole_param = 'netconsole=@%s/,%s@%s/%s' % (remote_ip,
170 port,
171 local_ip,
172 gateway_mac)
173
174
175 def __start_netconsole_log(self, logfilename, port):
176 """
177 Log the output of netconsole to a specified file
178 """
179 if logfilename == None:
180 return
181 cmd = ['nc', '-u', '-l', '-p', str(port)]
mblighfbb03542008-02-11 16:27:29 +0000182 logfile = open(logfilename, 'a', 0)
183 self.netlogger_popen = subprocess.Popen(cmd, stdout=logfile)
mblighde384372007-10-17 04:25:37 +0000184
185
186 def __load_netconsole_module(self):
187 """
188 Make a best effort to load the netconsole module.
189
190 Note that loading the module can fail even when the remote machine is
191 working correctly if netconsole is already compiled into the kernel
192 and started.
193 """
mblighc0e92392007-11-05 19:10:10 +0000194 if not self.__netconsole_param:
195 return
mblighde384372007-10-17 04:25:37 +0000196 try:
197 self.run('modprobe netconsole %s' % self.__netconsole_param)
mbligh03f4fc72007-11-29 20:56:14 +0000198 except AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000199 # if it fails there isn't much we can do, just keep going
200 pass
201
202
203 def __unload_netconsole_module(self):
204 try:
205 self.run('modprobe -r netconsole')
mbligh03f4fc72007-11-29 20:56:14 +0000206 except AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000207 pass
mbligh3409ee72007-10-16 23:58:33 +0000208
209
mbligh5deff3d2008-01-04 21:21:28 +0000210 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT):
mblighd567f722007-10-30 15:37:33 +0000211 if not self.wait_down(300): # Make sure he's dead, Jim
mblighf3b78932007-11-07 16:52:47 +0000212 self.__record("ABORT", None, "reboot.verify", "shutdown failed")
mbligh03f4fc72007-11-29 20:56:14 +0000213 raise AutoservRebootError("Host did not shut down")
mbligh3409ee72007-10-16 23:58:33 +0000214 self.wait_up(timeout)
215 time.sleep(2) # this is needed for complete reliability
mblighcf3d83a2007-11-05 19:21:39 +0000216 if self.wait_up(timeout):
mbligh30270302007-11-05 20:33:52 +0000217 self.__record("GOOD", None, "reboot.verify")
mblighcf3d83a2007-11-05 19:21:39 +0000218 else:
mblighf3b78932007-11-07 16:52:47 +0000219 self.__record("ABORT", None, "reboot.verify", "bringup failed")
mbligh03f4fc72007-11-29 20:56:14 +0000220 raise AutoservRebootError("Host did not return from reboot")
mbligh3409ee72007-10-16 23:58:33 +0000221 print "Reboot complete"
222
223
mbligh80d20772007-10-29 17:10:10 +0000224 def hardreset(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True):
mbligh3409ee72007-10-16 23:58:33 +0000225 """
226 Reach out and slap the box in the power switch
227 """
mblighf3b78932007-11-07 16:52:47 +0000228 if not self.__console_run(r"'~$hardreset'"):
229 self.__record("ABORT", None, "reboot.start", "hard reset unavailable")
mbligh4d6feff2008-01-14 16:48:56 +0000230 raise AutoservUnsupportedError('Hard reset unavailable')
mbligh37d53c32008-01-14 16:16:00 +0000231
232 if wait:
233 self.wait_for_restart(timeout)
mbligha4d4f372008-01-22 15:49:50 +0000234 self.__record("GOOD", None, "reboot.start", "hard reset")
mbligh3409ee72007-10-16 23:58:33 +0000235
236
mblighe6c995f2007-10-26 19:43:01 +0000237 def __conmux_hostname(self):
238 if self.conmux_server:
239 return '%s/%s' % (self.conmux_server, self.hostname)
240 else:
241 return self.hostname
242
243
mbligh3409ee72007-10-16 23:58:33 +0000244 def __start_console_log(self, logfilename):
245 """
246 Log the output of the console session to a specified file
247 """
248 if logfilename == None:
249 return
250 if not self.conmux_attach or not os.path.exists(self.conmux_attach):
251 return
mblighfbb03542008-02-11 16:27:29 +0000252 cmd = [self.conmux_attach, self.__conmux_hostname(),
253 'cat - >> %s' % logfilename]
254 dev_null = open('/dev/null', 'w')
255 setpg = lambda: os.setpgid(0, 0)
256 self.logger_popen = subprocess.Popen(cmd, stderr=dev_null,
257 preexec_fn=setpg)
mbligh3409ee72007-10-16 23:58:33 +0000258
259
mbligh94befff2007-12-10 18:03:14 +0000260 def __start_warning_log(self, logfilename):
mblighe6c995f2007-10-26 19:43:01 +0000261 """
262 Log the output of the warning monitor to a specified file
263 """
mbligh8bfa9f92007-11-24 19:29:30 +0000264 if logfilename == None or not os.path.isdir('debug'):
mblighe6c995f2007-10-26 19:43:01 +0000265 return
266 script_path = os.path.join(self.serverdir, 'warning_monitor')
mbligh7c5452d2007-11-05 18:35:31 +0000267 script_cmd = 'expect %s %s >> %s' % (script_path,
268 self.hostname,
269 logfilename)
mblighe6c995f2007-10-26 19:43:01 +0000270 if self.conmux_server:
271 to = '%s/%s'
mblighfbb03542008-02-11 16:27:29 +0000272 cmd = [self.conmux_attach, self.__conmux_hostname(),
273 script_cmd]
274 logfile = open('debug/conmux.log', 'a', 0)
275 setpg = lambda: os.setpgid(0, 0)
276 self.warning_popen = subprocess.Popen(cmd, stderr=logfile,
277 preexec_fn=setpg)
mblighe6c995f2007-10-26 19:43:01 +0000278
279
mbligh3409ee72007-10-16 23:58:33 +0000280 def __console_run(self, cmd):
281 """
282 Send a command to the conmux session
283 """
284 if not self.conmux_attach or not os.path.exists(self.conmux_attach):
285 return False
mbligh3409ee72007-10-16 23:58:33 +0000286 cmd = '%s %s echo %s 2> /dev/null' % (self.conmux_attach,
mblighe6c995f2007-10-26 19:43:01 +0000287 self.__conmux_hostname(),
mbligh3409ee72007-10-16 23:58:33 +0000288 cmd)
mbligh0f5ad642008-01-22 16:37:40 +0000289 result = utils.system(cmd, ignore_status=True)
mbligh3409ee72007-10-16 23:58:33 +0000290 return result == 0
mbligh7d2bde82007-08-02 16:26:10 +0000291
292
mbligh31a49de2007-11-05 18:41:19 +0000293 def __record(self, status_code, subdir, operation, status = ''):
294 if self.job:
295 self.job.record(status_code, subdir, operation, status)
296 else:
297 if not subdir:
298 subdir = "----"
299 msg = "%s\t%s\t%s\t%s" % (status_code, subdir, operation, status)
300 sys.stderr.write(msg + "\n")
301
302
mblighfa971602008-01-03 01:57:20 +0000303 def ssh_base_command(self, connect_timeout=30):
304 SSH_BASE_COMMAND = '/usr/bin/ssh -a -x -o ' + \
305 'BatchMode=yes -o ConnectTimeout=%d'
306 assert isinstance(connect_timeout, (int, long))
307 assert connect_timeout > 0 # can't disable the timeout
308 return SSH_BASE_COMMAND % connect_timeout
309
310
311 def ssh_command(self, connect_timeout=30):
mblighe6647d12007-10-17 00:00:01 +0000312 """Construct an ssh command with proper args for this host."""
mblighfa971602008-01-03 01:57:20 +0000313 ssh = self.ssh_base_command(connect_timeout)
314 return r'%s -l %s -p %d %s' % (ssh,
mbligh0faf91f2007-10-18 03:10:48 +0000315 self.user,
316 self.port,
317 self.hostname)
mblighe6647d12007-10-17 00:00:01 +0000318
319
mbligh07a923f2008-01-16 17:49:04 +0000320 def run(self, command, timeout=3600, ignore_status=False,
mblighfa971602008-01-03 01:57:20 +0000321 stdout_tee=None, stderr_tee=None, connect_timeout=30):
mbligh7d2bde82007-08-02 16:26:10 +0000322 """
323 Run a command on the remote host.
mblighdcd57a82007-07-11 23:06:47 +0000324
325 Args:
326 command: the command line string
327 timeout: time limit in seconds before attempting to
328 kill the running process. The run() function
329 will take a few seconds longer than 'timeout'
330 to complete if it has to kill the process.
mbligh8b85dfb2007-08-28 09:50:31 +0000331 ignore_status: do not raise an exception, no matter
332 what the exit code of the command is.
mblighdcd57a82007-07-11 23:06:47 +0000333
334 Returns:
335 a hosts.base_classes.CmdResult object
336
337 Raises:
338 AutoservRunError: the exit code of the command
339 execution was not 0
340 """
mblighadf2aab2007-11-29 18:16:43 +0000341 stdout = stdout_tee or sys.stdout
342 stderr = stderr_tee or sys.stderr
mbligh7995cc62007-11-30 15:53:23 +0000343 print "ssh: %s" % (command,)
mblighadf2aab2007-11-29 18:16:43 +0000344 env = " ".join("=".join(pair) for pair in self.env.iteritems())
mbligh34faa282008-01-16 17:44:49 +0000345 full_cmd = '%s "%s %s"' % (self.ssh_command(connect_timeout),
346 env, utils.sh_escape(command))
347 result = utils.run(full_cmd, timeout, True, stdout, stderr)
348 if result.exit_status == 255: # ssh's exit status for timeout
349 if re.match(r'^ssh: connect to host .* port .*: ' +
350 r'Connection timed out\r$', result.stderr):
351 raise AutoservSSHTimeout("ssh timed out",
352 result)
353 if not ignore_status and result.exit_status > 0:
354 raise AutoservRunError("command execution error",
355 result)
mblighdcd57a82007-07-11 23:06:47 +0000356 return result
mbligh7d2bde82007-08-02 16:26:10 +0000357
358
mbligh78669ff2008-01-10 16:33:07 +0000359 def run_grep(self, command, timeout=30, ignore_status=False,
360 stdout_ok_regexp=None, stdout_err_regexp=None,
361 stderr_ok_regexp=None, stderr_err_regexp=None,
362 connect_timeout=30):
363 """
364 Run a command on the remote host and look for regexp
365 in stdout or stderr to determine if the command was
366 successul or not.
mbligh6a2a2df2008-01-16 17:41:55 +0000367
mbligh78669ff2008-01-10 16:33:07 +0000368 Args:
369 command: the command line string
mbligh6a2a2df2008-01-16 17:41:55 +0000370 timeout: time limit in seconds before attempting to
mbligh78669ff2008-01-10 16:33:07 +0000371 kill the running process. The run() function
372 will take a few seconds longer than 'timeout'
373 to complete if it has to kill the process.
mbligh6a2a2df2008-01-16 17:41:55 +0000374 ignore_status: do not raise an exception, no matter
mbligh78669ff2008-01-10 16:33:07 +0000375 what the exit code of the command is.
376 stdout_ok_regexp: regexp that should be in stdout
377 if the command was successul.
378 stdout_err_regexp: regexp that should be in stdout
379 if the command failed.
380 stderr_ok_regexp: regexp that should be in stderr
381 if the command was successul.
382 stderr_err_regexp: regexp that should be in stderr
383 if the command failed.
mbligh6a2a2df2008-01-16 17:41:55 +0000384
mbligh78669ff2008-01-10 16:33:07 +0000385 Returns:
386 if the command was successul, raises an exception
387 otherwise.
mbligh6a2a2df2008-01-16 17:41:55 +0000388
mbligh78669ff2008-01-10 16:33:07 +0000389 Raises:
390 AutoservRunError:
391 - the exit code of the command execution was not 0.
mbligh6a2a2df2008-01-16 17:41:55 +0000392 - If stderr_err_regexp is found in stderr,
393 - If stdout_err_regexp is found in stdout,
mbligh78669ff2008-01-10 16:33:07 +0000394 - If stderr_ok_regexp is not found in stderr.
395 - If stdout_ok_regexp is not found in stdout,
396 """
397
398 # We ignore the status, because we will handle it at the end.
399 result = self.run(command, timeout, ignore_status=True,
mbligh6a2a2df2008-01-16 17:41:55 +0000400 connect_timeout=connect_timeout)
mbligh78669ff2008-01-10 16:33:07 +0000401
402 # Look for the patterns, in order
403 for (regexp, stream) in ((stderr_err_regexp, result.stderr),
mbligh6a2a2df2008-01-16 17:41:55 +0000404 (stdout_err_regexp, result.stdout)):
mbligh78669ff2008-01-10 16:33:07 +0000405 if regexp and stream:
406 err_re = re.compile (regexp)
407 if err_re.search(stream):
mbligh6a2a2df2008-01-16 17:41:55 +0000408 raise AutoservRunError(
409 '%s failed, found error pattern: '
410 '"%s"' % (command, regexp), result)
mbligh78669ff2008-01-10 16:33:07 +0000411
412 for (regexp, stream) in ((stderr_ok_regexp, result.stderr),
mbligh6a2a2df2008-01-16 17:41:55 +0000413 (stdout_ok_regexp, result.stdout)):
mbligh78669ff2008-01-10 16:33:07 +0000414 if regexp and stream:
415 ok_re = re.compile (regexp)
416 if ok_re.search(stream):
417 if ok_re.search(stream):
418 return
419
420 if not ignore_status and result.exit_status > 0:
mbligh6a2a2df2008-01-16 17:41:55 +0000421 raise AutoservRunError("command execution error",
422 result)
mbligh78669ff2008-01-10 16:33:07 +0000423
424
mbligh80d20772007-10-29 17:10:10 +0000425 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, label=None,
426 kernel_args=None, wait=True):
mbligh7d2bde82007-08-02 16:26:10 +0000427 """
428 Reboot the remote host.
mbligh8b85dfb2007-08-28 09:50:31 +0000429
mbligha0452c82007-08-08 20:24:57 +0000430 Args:
431 timeout
mbligh8b85dfb2007-08-28 09:50:31 +0000432 """
mbligh33ae0902007-11-24 19:27:08 +0000433 self.reboot_setup()
434
mblighde384372007-10-17 04:25:37 +0000435 # forcibly include the "netconsole" kernel arg
436 if self.__netconsole_param:
437 if kernel_args is None:
438 kernel_args = self.__netconsole_param
439 else:
440 kernel_args += " " + self.__netconsole_param
441 # unload the (possibly loaded) module to avoid shutdown issues
442 self.__unload_netconsole_module()
mbligha0452c82007-08-08 20:24:57 +0000443 if label or kernel_args:
444 self.bootloader.install_boottool()
445 if label:
446 self.bootloader.set_default(label)
447 if kernel_args:
448 if not label:
449 default = int(self.bootloader.get_default())
450 label = self.bootloader.get_titles()[default]
451 self.bootloader.add_args(label, kernel_args)
mblighd742a222007-09-30 01:27:06 +0000452 print "Reboot: initiating reboot"
mbligh30270302007-11-05 20:33:52 +0000453 self.__record("GOOD", None, "reboot.start")
mblighcf3d83a2007-11-05 19:21:39 +0000454 try:
mblighf3b78932007-11-07 16:52:47 +0000455 self.run('(sleep 5; reboot) </dev/null >/dev/null 2>&1 &')
mbligh03f4fc72007-11-29 20:56:14 +0000456 except AutoservRunError:
mblighf3b78932007-11-07 16:52:47 +0000457 self.__record("ABORT", None, "reboot.start",
458 "reboot command failed")
mblighcf3d83a2007-11-05 19:21:39 +0000459 raise
mbligha0452c82007-08-08 20:24:57 +0000460 if wait:
mbligh5deff3d2008-01-04 21:21:28 +0000461 self.wait_for_restart(timeout)
mblighde384372007-10-17 04:25:37 +0000462 self.__load_netconsole_module() # if the builtin fails
mbligha0452c82007-08-08 20:24:57 +0000463
mbligh7d2bde82007-08-02 16:26:10 +0000464
mblighcfc7ab32008-01-25 16:35:28 +0000465 def __copy_files(self, sources, dest):
466 """
467 Copy files from one machine to another.
468
469 This is for internal use by other methods that intend to move
470 files between machines. It expects a list of source files and
471 a destination (a filename if the source is a single file, a
472 destination otherwise). The names must already be
473 pre-processed into the appropriate rsync/scp friendly
474 format (%s@%s:%s).
475 """
476 # wait until there are only a small number of copies running
477 # before starting this one
478 MAXIMUM_SIMULTANEOUS_COPIES = 4
479 while True:
480 copy_count = 0
481 procs = utils.system_output('ps -ef')
482 for line in procs:
483 if 'rsync ' in line or 'scp ' in line:
484 copy_count += 1
485 if copy_count < MAXIMUM_SIMULTANEOUS_COPIES:
486 break
487 time.sleep(60)
488
489 try:
490 utils.run('rsync --rsh="%s" -az %s %s' % (
491 self.ssh_base_command(), ' '.join(sources), dest))
492 except Exception:
493 utils.run('scp -rpq %s "%s"' % (
494 ' '.join(sources), dest))
495
496
mblighdcd57a82007-07-11 23:06:47 +0000497 def get_file(self, source, dest):
mbligh7d2bde82007-08-02 16:26:10 +0000498 """
499 Copy files from the remote host to a local path.
mblighdcd57a82007-07-11 23:06:47 +0000500
501 Directories will be copied recursively.
502 If a source component is a directory with a trailing slash,
503 the content of the directory will be copied, otherwise, the
504 directory itself and its content will be copied. This
505 behavior is similar to that of the program 'rsync'.
506
507 Args:
508 source: either
509 1) a single file or directory, as a string
510 2) a list of one or more (possibly mixed)
511 files or directories
512 dest: a file or a directory (if source contains a
513 directory or more than one element, you must
514 supply a directory dest)
515
516 Raises:
517 AutoservRunError: the scp command failed
518 """
519 if isinstance(source, types.StringTypes):
520 source= [source]
521
522 processed_source= []
523 for entry in source:
524 if entry.endswith('/'):
525 format_string= '%s@%s:"%s*"'
526 else:
527 format_string= '%s@%s:"%s"'
528 entry= format_string % (self.user, self.hostname,
529 utils.scp_remote_escape(entry))
530 processed_source.append(entry)
531
532 processed_dest= os.path.abspath(dest)
533 if os.path.isdir(dest):
534 processed_dest= "%s/" % (utils.sh_escape(processed_dest),)
535 else:
536 processed_dest= utils.sh_escape(processed_dest)
mblighcfc7ab32008-01-25 16:35:28 +0000537
538 self.__copy_files(processed_source, processed_dest)
mbligh7d2bde82007-08-02 16:26:10 +0000539
540
mblighdcd57a82007-07-11 23:06:47 +0000541 def send_file(self, source, dest):
mbligh7d2bde82007-08-02 16:26:10 +0000542 """
543 Copy files from a local path to the remote host.
mblighdcd57a82007-07-11 23:06:47 +0000544
545 Directories will be copied recursively.
546 If a source component is a directory with a trailing slash,
547 the content of the directory will be copied, otherwise, the
548 directory itself and its content will be copied. This
549 behavior is similar to that of the program 'rsync'.
550
551 Args:
552 source: either
553 1) a single file or directory, as a string
554 2) a list of one or more (possibly mixed)
555 files or directories
556 dest: a file or a directory (if source contains a
557 directory or more than one element, you must
558 supply a directory dest)
559
560 Raises:
561 AutoservRunError: the scp command failed
562 """
563 if isinstance(source, types.StringTypes):
564 source= [source]
565
566 processed_source= []
567 for entry in source:
568 if entry.endswith('/'):
569 format_string= '"%s/"*'
570 else:
571 format_string= '"%s"'
572 entry= format_string % (utils.sh_escape(os.path.abspath(entry)),)
573 processed_source.append(entry)
mbligh7d2bde82007-08-02 16:26:10 +0000574
mbligh0faf91f2007-10-18 03:10:48 +0000575 remote_dest = '%s@%s:"%s"' % (
576 self.user, self.hostname,
577 utils.scp_remote_escape(dest))
mblighcfc7ab32008-01-25 16:35:28 +0000578
579 self.__copy_files(processed_source, remote_dest)
mblighc42141f2007-11-05 20:25:46 +0000580 self.run('find "%s" -type d | xargs -r chmod o+rx' % dest)
581 self.run('find "%s" -type f | xargs -r chmod o+r' % dest)
mbligh7d2bde82007-08-02 16:26:10 +0000582
mblighdcd57a82007-07-11 23:06:47 +0000583 def get_tmp_dir(self):
mbligh7d2bde82007-08-02 16:26:10 +0000584 """
585 Return the pathname of a directory on the host suitable
mblighdcd57a82007-07-11 23:06:47 +0000586 for temporary file storage.
587
588 The directory and its content will be deleted automatically
589 on the destruction of the Host object that was used to obtain
590 it.
591 """
mbligha25b29e2007-08-26 13:58:04 +0000592 dir_name= self.run("mktemp -d /tmp/autoserv-XXXXXX").stdout.rstrip(" \n")
mblighdcd57a82007-07-11 23:06:47 +0000593 self.tmp_dirs.append(dir_name)
594 return dir_name
mbligh7d2bde82007-08-02 16:26:10 +0000595
596
mblighdcd57a82007-07-11 23:06:47 +0000597 def is_up(self):
mbligh7d2bde82007-08-02 16:26:10 +0000598 """
599 Check if the remote host is up.
mblighdcd57a82007-07-11 23:06:47 +0000600
601 Returns:
602 True if the remote host is up, False otherwise
603 """
604 try:
mbligh4cfa76a2007-11-26 20:45:16 +0000605 self.ssh_ping()
mbligheadfbb12007-11-26 23:03:12 +0000606 except:
mblighdcd57a82007-07-11 23:06:47 +0000607 return False
mbligheadfbb12007-11-26 23:03:12 +0000608 return True
mbligh7d2bde82007-08-02 16:26:10 +0000609
mbligh7d2bde82007-08-02 16:26:10 +0000610
mblighdcd57a82007-07-11 23:06:47 +0000611 def wait_up(self, timeout=None):
mbligh7d2bde82007-08-02 16:26:10 +0000612 """
613 Wait until the remote host is up or the timeout expires.
mblighdcd57a82007-07-11 23:06:47 +0000614
615 In fact, it will wait until an ssh connection to the remote
616 host can be established.
617
618 Args:
619 timeout: time limit in seconds before returning even
620 if the host is not up.
621
622 Returns:
623 True if the host was found to be up, False otherwise
624 """
625 if timeout:
626 end_time= time.time() + timeout
627
628 while not timeout or time.time() < end_time:
629 try:
mbligh4cfa76a2007-11-26 20:45:16 +0000630 self.ssh_ping()
mbligheadfbb12007-11-26 23:03:12 +0000631 except:
mblighdcd57a82007-07-11 23:06:47 +0000632 pass
633 else:
mbligheadfbb12007-11-26 23:03:12 +0000634 return True
mblighdcd57a82007-07-11 23:06:47 +0000635 time.sleep(1)
636
637 return False
mbligh7d2bde82007-08-02 16:26:10 +0000638
639
mblighdcd57a82007-07-11 23:06:47 +0000640 def wait_down(self, timeout=None):
mbligh7d2bde82007-08-02 16:26:10 +0000641 """
642 Wait until the remote host is down or the timeout expires.
mblighdcd57a82007-07-11 23:06:47 +0000643
644 In fact, it will wait until an ssh connection to the remote
645 host fails.
646
647 Args:
648 timeout: time limit in seconds before returning even
649 if the host is not up.
650
651 Returns:
652 True if the host was found to be down, False otherwise
653 """
654 if timeout:
655 end_time= time.time() + timeout
656
657 while not timeout or time.time() < end_time:
658 try:
mbligh4cfa76a2007-11-26 20:45:16 +0000659 self.ssh_ping()
mbligheadfbb12007-11-26 23:03:12 +0000660 except:
mblighdcd57a82007-07-11 23:06:47 +0000661 return True
mblighdcd57a82007-07-11 23:06:47 +0000662 time.sleep(1)
663
664 return False
mbligh7d2bde82007-08-02 16:26:10 +0000665
666
mblighdbe4a382007-07-26 19:41:28 +0000667 def ensure_up(self):
mbligh7d2bde82007-08-02 16:26:10 +0000668 """
669 Ensure the host is up if it is not then do not proceed;
670 this prevents cacading failures of tests
671 """
mbligha0452c82007-08-08 20:24:57 +0000672 print 'Ensuring that %s is up before continuing' % self.hostname
673 if hasattr(self, 'hardreset') and not self.wait_up(300):
mblighdbe4a382007-07-26 19:41:28 +0000674 print "Performing a hardreset on %s" % self.hostname
mbligh4ba0b462007-11-05 23:05:40 +0000675 try:
676 self.hardreset()
mbligh03f4fc72007-11-29 20:56:14 +0000677 except AutoservUnsupportedError:
mbligh4ba0b462007-11-05 23:05:40 +0000678 print "Hardreset is unsupported on %s" % self.hostname
mbligha9563b92007-10-25 14:45:56 +0000679 if not self.wait_up(60 * 30):
680 # 30 minutes should be more than enough
mbligh03f4fc72007-11-29 20:56:14 +0000681 raise AutoservHostError
mbligha0452c82007-08-08 20:24:57 +0000682 print 'Host up, continuing'
mbligh7d2bde82007-08-02 16:26:10 +0000683
684
mblighdcd57a82007-07-11 23:06:47 +0000685 def get_num_cpu(self):
mbligh7d2bde82007-08-02 16:26:10 +0000686 """
687 Get the number of CPUs in the host according to
mblighdcd57a82007-07-11 23:06:47 +0000688 /proc/cpuinfo.
689
690 Returns:
691 The number of CPUs
692 """
693
mbligh5f876ad2007-10-12 23:59:53 +0000694 proc_cpuinfo = self.run("cat /proc/cpuinfo").stdout
mblighdcd57a82007-07-11 23:06:47 +0000695 cpus = 0
696 for line in proc_cpuinfo.splitlines():
697 if line.startswith('processor'):
698 cpus += 1
699 return cpus
mbligh5f876ad2007-10-12 23:59:53 +0000700
701
702 def check_uptime(self):
703 """
704 Check that uptime is available and monotonically increasing.
705 """
706 if not self.ping():
mbligh4d6feff2008-01-14 16:48:56 +0000707 raise AutoservHostError('Client is not pingable')
mbligh5f876ad2007-10-12 23:59:53 +0000708 result = self.run("/bin/cat /proc/uptime", 30)
709 return result.stdout.strip().split()[0]
710
711
712 def get_arch(self):
713 """
714 Get the hardware architecture of the remote machine
715 """
716 arch = self.run('/bin/uname -m').stdout.rstrip()
717 if re.match(r'i\d86$', arch):
718 arch = 'i386'
719 return arch
720
721
722 def get_kernel_ver(self):
723 """
724 Get the kernel version of the remote machine
725 """
726 return self.run('/bin/uname -r').stdout.rstrip()
727
728
729 def get_cmdline(self):
730 """
731 Get the kernel command line of the remote machine
732 """
733 return self.run('cat /proc/cmdline').stdout.rstrip()
734
735
736 def ping(self):
737 """
738 Ping the remote system, and return whether it's available
739 """
740 fpingcmd = "%s -q %s" % ('/usr/bin/fping', self.hostname)
741 rc = utils.system(fpingcmd, ignore_status = 1)
742 return (rc == 0)
mblighd2e46052007-11-05 18:31:00 +0000743
mblighf014ff42007-11-26 21:33:11 +0000744
mbligh4cfa76a2007-11-26 20:45:16 +0000745 def ssh_ping(self, timeout = 60):
mbligh4ff46b02008-02-01 17:33:37 +0000746 self.run('true', timeout = timeout, connect_timeout = timeout)
mblighda13d542008-01-03 16:28:34 +0000747
748
749 def get_autodir(self):
750 return self.autodir