blob: 45fedb671a36ca79f3bf31c8adba83e08a7b4246 [file] [log] [blame]
mblighdcd57a82007-07-11 23:06:47 +00001#!/usr/bin/python
2#
3# Copyright 2007 Google Inc. Released under the GPL v2
4
mbligh7d2bde82007-08-02 16:26:10 +00005"""
6This module defines the SSHHost class.
mblighdcd57a82007-07-11 23:06:47 +00007
8Implementation details:
9You should import the "hosts" package instead of importing each type of host.
10
11 SSHHost: a remote machine with a ssh access
12"""
13
mbligh7d2bde82007-08-02 16:26:10 +000014__author__ = """
15mbligh@google.com (Martin J. Bligh),
mblighdcd57a82007-07-11 23:06:47 +000016poirier@google.com (Benjamin Poirier),
mbligh7d2bde82007-08-02 16:26:10 +000017stutsman@google.com (Ryan Stutsman)
18"""
mblighdcd57a82007-07-11 23:06:47 +000019
20
mblighde384372007-10-17 04:25:37 +000021import types, os, sys, signal, subprocess, time, re, socket
mbligh03f4fc72007-11-29 20:56:14 +000022import base_classes, utils, bootloader
mbligh03f4fc72007-11-29 20:56:14 +000023from common.error import *
mblighdcd57a82007-07-11 23:06:47 +000024
25
mblighbda9c9c2008-04-08 17:45:00 +000026
mblighdcd57a82007-07-11 23:06:47 +000027class SSHHost(base_classes.RemoteHost):
mbligh7d2bde82007-08-02 16:26:10 +000028 """
29 This class represents a remote machine controlled through an ssh
mblighdcd57a82007-07-11 23:06:47 +000030 session on which you can run programs.
mbligh7d2bde82007-08-02 16:26:10 +000031
mblighdcd57a82007-07-11 23:06:47 +000032 It is not the machine autoserv is running on. The machine must be
33 configured for password-less login, for example through public key
34 authentication.
mbligh7d2bde82007-08-02 16:26:10 +000035
mbligh3409ee72007-10-16 23:58:33 +000036 It includes support for controlling the machine through a serial
37 console on which you can run programs. If such a serial console is
38 set up on the machine then capabilities such as hard reset and
39 boot strap monitoring are available. If the machine does not have a
40 serial console available then ordinary SSH-based commands will
41 still be available, but attempts to use extensions such as
42 console logging or hard reset will fail silently.
43
mblighdcd57a82007-07-11 23:06:47 +000044 Implementation details:
45 This is a leaf class in an abstract class hierarchy, it must
46 implement the unimplemented methods in parent classes.
47 """
mbligh7d2bde82007-08-02 16:26:10 +000048
mbligh31a49de2007-11-05 18:41:19 +000049 DEFAULT_REBOOT_TIMEOUT = 1800
50 job = None
mbligh0faf91f2007-10-18 03:10:48 +000051
mblighde384372007-10-17 04:25:37 +000052 def __init__(self, hostname, user="root", port=22, initialize=True,
mblighf4e04152008-02-21 16:05:53 +000053 conmux_log="console.log",
mblighe6c995f2007-10-26 19:43:01 +000054 conmux_server=None, conmux_attach=None,
mblighda13d542008-01-03 16:28:34 +000055 netconsole_log=None, netconsole_port=6666, autodir=None):
mbligh7d2bde82007-08-02 16:26:10 +000056 """
57 Construct a SSHHost object
mblighdcd57a82007-07-11 23:06:47 +000058
59 Args:
60 hostname: network hostname or address of remote machine
61 user: user to log in as on the remote machine
62 port: port the ssh daemon is listening on on the remote
63 machine
mbligh9708f732007-10-18 03:18:54 +000064 """
mblighdcd57a82007-07-11 23:06:47 +000065 self.hostname= hostname
66 self.user= user
67 self.port= port
68 self.tmp_dirs= []
mbligh137a05c2007-10-04 15:56:51 +000069 self.initialize = initialize
mblighda13d542008-01-03 16:28:34 +000070 self.autodir = autodir
mbligh91334902007-09-28 01:47:59 +000071
mbligh9708f732007-10-18 03:18:54 +000072 super(SSHHost, self).__init__()
73
mbligh3409ee72007-10-16 23:58:33 +000074 self.conmux_server = conmux_server
mbligh70cf0ec2008-01-18 17:57:14 +000075 if conmux_attach:
76 self.conmux_attach = conmux_attach
77 else:
78 self.conmux_attach = os.path.abspath(os.path.join(
79 self.serverdir, '..',
80 'conmux', 'conmux-attach'))
mblighfbb03542008-02-11 16:27:29 +000081 self.logger_popen = None
mblighf4e04152008-02-21 16:05:53 +000082 self.warning_stream = None
mblighde384372007-10-17 04:25:37 +000083 self.__start_console_log(conmux_log)
mbligh3409ee72007-10-16 23:58:33 +000084
mbligha0452c82007-08-08 20:24:57 +000085 self.bootloader = bootloader.Bootloader(self)
mbligh7d2bde82007-08-02 16:26:10 +000086
mblighc0e92392007-11-05 19:10:10 +000087 self.__netconsole_param = ""
mblighfbb03542008-02-11 16:27:29 +000088 self.netlogger_popen = None
mblighc0e92392007-11-05 19:10:10 +000089 if netconsole_log:
90 self.__init_netconsole_params(netconsole_port)
91 self.__start_netconsole_log(netconsole_log, netconsole_port)
92 self.__load_netconsole_module()
mblighde384372007-10-17 04:25:37 +000093
mbligh7d2bde82007-08-02 16:26:10 +000094
mblighfbb03542008-02-11 16:27:29 +000095 @staticmethod
mblighf4e04152008-02-21 16:05:53 +000096 def __kill(popen):
mblighfbb03542008-02-11 16:27:29 +000097 return_code = popen.poll()
mblighf4e04152008-02-21 16:05:53 +000098 if return_code is None:
mblighfbb03542008-02-11 16:27:29 +000099 try:
mblighf4e04152008-02-21 16:05:53 +0000100 os.kill(popen.pid, signal.SIGTERM)
mblighfbb03542008-02-11 16:27:29 +0000101 except OSError:
102 pass
103
104
mblighdcd57a82007-07-11 23:06:47 +0000105 def __del__(self):
mbligh7d2bde82007-08-02 16:26:10 +0000106 """
107 Destroy a SSHHost object
mblighdcd57a82007-07-11 23:06:47 +0000108 """
109 for dir in self.tmp_dirs:
110 try:
111 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
mbligh03f4fc72007-11-29 20:56:14 +0000112 except AutoservRunError:
mblighdcd57a82007-07-11 23:06:47 +0000113 pass
mblighde384372007-10-17 04:25:37 +0000114 # kill the console logger
mblighfbb03542008-02-11 16:27:29 +0000115 if getattr(self, 'logger_popen', None):
mblighf4e04152008-02-21 16:05:53 +0000116 self.__kill(self.logger_popen)
mbligh632f8382008-02-27 16:39:41 +0000117 self.job.warning_loggers.discard(self.warning_stream)
mblighf4e04152008-02-21 16:05:53 +0000118 self.warning_stream.close()
mblighde384372007-10-17 04:25:37 +0000119 # kill the netconsole logger
mblighfbb03542008-02-11 16:27:29 +0000120 if getattr(self, 'netlogger_popen', None):
mblighe6c995f2007-10-26 19:43:01 +0000121 self.__unload_netconsole_module()
mblighf4e04152008-02-21 16:05:53 +0000122 self.__kill(self.netlogger_popen)
mblighde384372007-10-17 04:25:37 +0000123
124
125 def __init_netconsole_params(self, port):
126 """
127 Connect to the remote machine and determine the values to use for the
128 required netconsole parameters.
129 """
mblighde384372007-10-17 04:25:37 +0000130 # PROBLEM: on machines with multiple IPs this may not make any sense
131 # It also doesn't work with IPv6
132 remote_ip = socket.gethostbyname(self.hostname)
133 local_ip = socket.gethostbyname(socket.gethostname())
134 # Get the gateway of the remote machine
135 try:
136 traceroute = self.run('traceroute -n %s' % local_ip)
mbligh03f4fc72007-11-29 20:56:14 +0000137 except AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000138 return
139 first_node = traceroute.stdout.split("\n")[0]
140 match = re.search(r'\s+((\d+\.){3}\d+)\s+', first_node)
141 if match:
142 router_ip = match.group(1)
143 else:
144 return
145 # Look up the MAC address of the gateway
146 try:
147 self.run('ping -c 1 %s' % router_ip)
148 arp = self.run('arp -n -a %s' % router_ip)
mbligh03f4fc72007-11-29 20:56:14 +0000149 except AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000150 return
151 match = re.search(r'\s+(([0-9A-F]{2}:){5}[0-9A-F]{2})\s+', arp.stdout)
152 if match:
153 gateway_mac = match.group(1)
154 else:
155 return
156 self.__netconsole_param = 'netconsole=@%s/,%s@%s/%s' % (remote_ip,
157 port,
158 local_ip,
159 gateway_mac)
160
161
162 def __start_netconsole_log(self, logfilename, port):
163 """
164 Log the output of netconsole to a specified file
165 """
166 if logfilename == None:
167 return
168 cmd = ['nc', '-u', '-l', '-p', str(port)]
mblighfbb03542008-02-11 16:27:29 +0000169 logfile = open(logfilename, 'a', 0)
170 self.netlogger_popen = subprocess.Popen(cmd, stdout=logfile)
mblighde384372007-10-17 04:25:37 +0000171
172
173 def __load_netconsole_module(self):
174 """
175 Make a best effort to load the netconsole module.
176
177 Note that loading the module can fail even when the remote machine is
178 working correctly if netconsole is already compiled into the kernel
179 and started.
180 """
mblighc0e92392007-11-05 19:10:10 +0000181 if not self.__netconsole_param:
182 return
mblighde384372007-10-17 04:25:37 +0000183 try:
184 self.run('modprobe netconsole %s' % self.__netconsole_param)
mbligh03f4fc72007-11-29 20:56:14 +0000185 except AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000186 # if it fails there isn't much we can do, just keep going
187 pass
188
189
190 def __unload_netconsole_module(self):
191 try:
192 self.run('modprobe -r netconsole')
mbligh03f4fc72007-11-29 20:56:14 +0000193 except AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000194 pass
mbligh3409ee72007-10-16 23:58:33 +0000195
196
mbligh5deff3d2008-01-04 21:21:28 +0000197 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT):
mblighd567f722007-10-30 15:37:33 +0000198 if not self.wait_down(300): # Make sure he's dead, Jim
mblighf3b78932007-11-07 16:52:47 +0000199 self.__record("ABORT", None, "reboot.verify", "shutdown failed")
mbligh03f4fc72007-11-29 20:56:14 +0000200 raise AutoservRebootError("Host did not shut down")
mbligh3409ee72007-10-16 23:58:33 +0000201 self.wait_up(timeout)
202 time.sleep(2) # this is needed for complete reliability
mblighcf3d83a2007-11-05 19:21:39 +0000203 if self.wait_up(timeout):
mbligh30270302007-11-05 20:33:52 +0000204 self.__record("GOOD", None, "reboot.verify")
mblighcf3d83a2007-11-05 19:21:39 +0000205 else:
mbligh71d24222008-03-11 21:31:56 +0000206 self.__record("ABORT", None, "reboot.verify", "Host did not return from reboot")
mbligh03f4fc72007-11-29 20:56:14 +0000207 raise AutoservRebootError("Host did not return from reboot")
mbligh3409ee72007-10-16 23:58:33 +0000208 print "Reboot complete"
209
210
mbligh80d20772007-10-29 17:10:10 +0000211 def hardreset(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True):
mbligh3409ee72007-10-16 23:58:33 +0000212 """
213 Reach out and slap the box in the power switch
214 """
mblighf3b78932007-11-07 16:52:47 +0000215 if not self.__console_run(r"'~$hardreset'"):
216 self.__record("ABORT", None, "reboot.start", "hard reset unavailable")
mbligh4d6feff2008-01-14 16:48:56 +0000217 raise AutoservUnsupportedError('Hard reset unavailable')
mbligh37d53c32008-01-14 16:16:00 +0000218
219 if wait:
220 self.wait_for_restart(timeout)
mbligha4d4f372008-01-22 15:49:50 +0000221 self.__record("GOOD", None, "reboot.start", "hard reset")
mbligh3409ee72007-10-16 23:58:33 +0000222
223
mblighe6c995f2007-10-26 19:43:01 +0000224 def __conmux_hostname(self):
225 if self.conmux_server:
226 return '%s/%s' % (self.conmux_server, self.hostname)
227 else:
228 return self.hostname
229
230
mbligh3409ee72007-10-16 23:58:33 +0000231 def __start_console_log(self, logfilename):
232 """
233 Log the output of the console session to a specified file
234 """
235 if logfilename == None:
236 return
237 if not self.conmux_attach or not os.path.exists(self.conmux_attach):
238 return
mblighf4e04152008-02-21 16:05:53 +0000239
240 r, w = os.pipe()
241 script_path = os.path.join(self.serverdir,
242 'warning_monitor.py')
mblighfbb03542008-02-11 16:27:29 +0000243 cmd = [self.conmux_attach, self.__conmux_hostname(),
mblighf4e04152008-02-21 16:05:53 +0000244 '%s %s %s %d' % (sys.executable, script_path,
245 logfilename, w)]
mbligh0c5ce312008-02-21 16:24:11 +0000246 dev_null = open(os.devnull, 'w')
mbligh3409ee72007-10-16 23:58:33 +0000247
mblighf4e04152008-02-21 16:05:53 +0000248 self.warning_stream = os.fdopen(r, 'r', 0)
249 self.job.warning_loggers.add(self.warning_stream)
250 self.logger_popen = subprocess.Popen(cmd, stderr=dev_null)
251 os.close(w)
mblighe6c995f2007-10-26 19:43:01 +0000252
253
mbligh3409ee72007-10-16 23:58:33 +0000254 def __console_run(self, cmd):
255 """
256 Send a command to the conmux session
257 """
258 if not self.conmux_attach or not os.path.exists(self.conmux_attach):
259 return False
mbligh3409ee72007-10-16 23:58:33 +0000260 cmd = '%s %s echo %s 2> /dev/null' % (self.conmux_attach,
mblighe6c995f2007-10-26 19:43:01 +0000261 self.__conmux_hostname(),
mbligh3409ee72007-10-16 23:58:33 +0000262 cmd)
mbligh0f5ad642008-01-22 16:37:40 +0000263 result = utils.system(cmd, ignore_status=True)
mbligh3409ee72007-10-16 23:58:33 +0000264 return result == 0
mbligh7d2bde82007-08-02 16:26:10 +0000265
266
mbligh31a49de2007-11-05 18:41:19 +0000267 def __record(self, status_code, subdir, operation, status = ''):
268 if self.job:
269 self.job.record(status_code, subdir, operation, status)
270 else:
271 if not subdir:
272 subdir = "----"
273 msg = "%s\t%s\t%s\t%s" % (status_code, subdir, operation, status)
274 sys.stderr.write(msg + "\n")
275
276
mblighfa971602008-01-03 01:57:20 +0000277 def ssh_base_command(self, connect_timeout=30):
278 SSH_BASE_COMMAND = '/usr/bin/ssh -a -x -o ' + \
mbligh0ad21ba2008-03-14 15:06:21 +0000279 'BatchMode=yes -o ConnectTimeout=%d ' + \
280 '-o ServerAliveInterval=300'
mblighfa971602008-01-03 01:57:20 +0000281 assert isinstance(connect_timeout, (int, long))
282 assert connect_timeout > 0 # can't disable the timeout
283 return SSH_BASE_COMMAND % connect_timeout
284
285
286 def ssh_command(self, connect_timeout=30):
mblighe6647d12007-10-17 00:00:01 +0000287 """Construct an ssh command with proper args for this host."""
mblighfa971602008-01-03 01:57:20 +0000288 ssh = self.ssh_base_command(connect_timeout)
289 return r'%s -l %s -p %d %s' % (ssh,
mbligh0faf91f2007-10-18 03:10:48 +0000290 self.user,
291 self.port,
292 self.hostname)
mblighe6647d12007-10-17 00:00:01 +0000293
294
mbligh07a923f2008-01-16 17:49:04 +0000295 def run(self, command, timeout=3600, ignore_status=False,
mblighfa971602008-01-03 01:57:20 +0000296 stdout_tee=None, stderr_tee=None, connect_timeout=30):
mbligh7d2bde82007-08-02 16:26:10 +0000297 """
298 Run a command on the remote host.
mblighdcd57a82007-07-11 23:06:47 +0000299
300 Args:
301 command: the command line string
302 timeout: time limit in seconds before attempting to
303 kill the running process. The run() function
304 will take a few seconds longer than 'timeout'
305 to complete if it has to kill the process.
mbligh8b85dfb2007-08-28 09:50:31 +0000306 ignore_status: do not raise an exception, no matter
307 what the exit code of the command is.
mblighdcd57a82007-07-11 23:06:47 +0000308
309 Returns:
310 a hosts.base_classes.CmdResult object
311
312 Raises:
313 AutoservRunError: the exit code of the command
314 execution was not 0
mblighcaa62c22008-04-07 21:51:17 +0000315 AutoservSSHTimeout: ssh connection has timed out
mblighdcd57a82007-07-11 23:06:47 +0000316 """
mblighadf2aab2007-11-29 18:16:43 +0000317 stdout = stdout_tee or sys.stdout
mbligh8d4baaa2008-03-12 14:48:24 +0000318 stderr = stderr_tee or sys.stdout
mbligh7995cc62007-11-30 15:53:23 +0000319 print "ssh: %s" % (command,)
mblighadf2aab2007-11-29 18:16:43 +0000320 env = " ".join("=".join(pair) for pair in self.env.iteritems())
mbligh34faa282008-01-16 17:44:49 +0000321 full_cmd = '%s "%s %s"' % (self.ssh_command(connect_timeout),
322 env, utils.sh_escape(command))
323 result = utils.run(full_cmd, timeout, True, stdout, stderr)
324 if result.exit_status == 255: # ssh's exit status for timeout
325 if re.match(r'^ssh: connect to host .* port .*: ' +
326 r'Connection timed out\r$', result.stderr):
327 raise AutoservSSHTimeout("ssh timed out",
328 result)
329 if not ignore_status and result.exit_status > 0:
330 raise AutoservRunError("command execution error",
331 result)
mblighdcd57a82007-07-11 23:06:47 +0000332 return result
mbligh7d2bde82007-08-02 16:26:10 +0000333
334
mblighbda9c9c2008-04-08 17:45:00 +0000335 def run_short(self, command, **kwargs):
336 """
337 Calls the run() command with a short default timeout.
338
339 Args:
340 Takes the same arguments as does run(),
341 with the exception of the timeout argument which
342 here is fixed at 60 seconds.
343 It returns the result of run.
344 """
345 return self.run(command, timeout=60, **kwargs)
346
347
mbligh78669ff2008-01-10 16:33:07 +0000348 def run_grep(self, command, timeout=30, ignore_status=False,
349 stdout_ok_regexp=None, stdout_err_regexp=None,
350 stderr_ok_regexp=None, stderr_err_regexp=None,
351 connect_timeout=30):
352 """
353 Run a command on the remote host and look for regexp
354 in stdout or stderr to determine if the command was
355 successul or not.
mbligh6a2a2df2008-01-16 17:41:55 +0000356
mbligh78669ff2008-01-10 16:33:07 +0000357 Args:
358 command: the command line string
mbligh6a2a2df2008-01-16 17:41:55 +0000359 timeout: time limit in seconds before attempting to
mbligh78669ff2008-01-10 16:33:07 +0000360 kill the running process. The run() function
361 will take a few seconds longer than 'timeout'
362 to complete if it has to kill the process.
mbligh6a2a2df2008-01-16 17:41:55 +0000363 ignore_status: do not raise an exception, no matter
mbligh78669ff2008-01-10 16:33:07 +0000364 what the exit code of the command is.
365 stdout_ok_regexp: regexp that should be in stdout
366 if the command was successul.
367 stdout_err_regexp: regexp that should be in stdout
368 if the command failed.
369 stderr_ok_regexp: regexp that should be in stderr
370 if the command was successul.
371 stderr_err_regexp: regexp that should be in stderr
372 if the command failed.
mbligh6a2a2df2008-01-16 17:41:55 +0000373
mbligh78669ff2008-01-10 16:33:07 +0000374 Returns:
375 if the command was successul, raises an exception
376 otherwise.
mbligh6a2a2df2008-01-16 17:41:55 +0000377
mbligh78669ff2008-01-10 16:33:07 +0000378 Raises:
379 AutoservRunError:
380 - the exit code of the command execution was not 0.
mbligh6a2a2df2008-01-16 17:41:55 +0000381 - If stderr_err_regexp is found in stderr,
382 - If stdout_err_regexp is found in stdout,
mbligh78669ff2008-01-10 16:33:07 +0000383 - If stderr_ok_regexp is not found in stderr.
384 - If stdout_ok_regexp is not found in stdout,
385 """
386
387 # We ignore the status, because we will handle it at the end.
388 result = self.run(command, timeout, ignore_status=True,
mbligh6a2a2df2008-01-16 17:41:55 +0000389 connect_timeout=connect_timeout)
mbligh78669ff2008-01-10 16:33:07 +0000390
391 # Look for the patterns, in order
392 for (regexp, stream) in ((stderr_err_regexp, result.stderr),
mbligh6a2a2df2008-01-16 17:41:55 +0000393 (stdout_err_regexp, result.stdout)):
mbligh78669ff2008-01-10 16:33:07 +0000394 if regexp and stream:
395 err_re = re.compile (regexp)
396 if err_re.search(stream):
mbligh6a2a2df2008-01-16 17:41:55 +0000397 raise AutoservRunError(
398 '%s failed, found error pattern: '
399 '"%s"' % (command, regexp), result)
mbligh78669ff2008-01-10 16:33:07 +0000400
401 for (regexp, stream) in ((stderr_ok_regexp, result.stderr),
mbligh6a2a2df2008-01-16 17:41:55 +0000402 (stdout_ok_regexp, result.stdout)):
mbligh78669ff2008-01-10 16:33:07 +0000403 if regexp and stream:
404 ok_re = re.compile (regexp)
405 if ok_re.search(stream):
406 if ok_re.search(stream):
407 return
408
409 if not ignore_status and result.exit_status > 0:
mbligh6a2a2df2008-01-16 17:41:55 +0000410 raise AutoservRunError("command execution error",
411 result)
mbligh78669ff2008-01-10 16:33:07 +0000412
413
mbligh80d20772007-10-29 17:10:10 +0000414 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, label=None,
415 kernel_args=None, wait=True):
mbligh7d2bde82007-08-02 16:26:10 +0000416 """
417 Reboot the remote host.
mbligh8b85dfb2007-08-28 09:50:31 +0000418
mbligha0452c82007-08-08 20:24:57 +0000419 Args:
420 timeout
mbligh8b85dfb2007-08-28 09:50:31 +0000421 """
mbligh33ae0902007-11-24 19:27:08 +0000422 self.reboot_setup()
423
mblighde384372007-10-17 04:25:37 +0000424 # forcibly include the "netconsole" kernel arg
425 if self.__netconsole_param:
426 if kernel_args is None:
427 kernel_args = self.__netconsole_param
428 else:
429 kernel_args += " " + self.__netconsole_param
430 # unload the (possibly loaded) module to avoid shutdown issues
431 self.__unload_netconsole_module()
mbligha0452c82007-08-08 20:24:57 +0000432 if label or kernel_args:
433 self.bootloader.install_boottool()
434 if label:
435 self.bootloader.set_default(label)
436 if kernel_args:
437 if not label:
438 default = int(self.bootloader.get_default())
439 label = self.bootloader.get_titles()[default]
440 self.bootloader.add_args(label, kernel_args)
mblighd742a222007-09-30 01:27:06 +0000441 print "Reboot: initiating reboot"
mbligh30270302007-11-05 20:33:52 +0000442 self.__record("GOOD", None, "reboot.start")
mblighcf3d83a2007-11-05 19:21:39 +0000443 try:
mblighf3b78932007-11-07 16:52:47 +0000444 self.run('(sleep 5; reboot) </dev/null >/dev/null 2>&1 &')
mbligh03f4fc72007-11-29 20:56:14 +0000445 except AutoservRunError:
mblighf3b78932007-11-07 16:52:47 +0000446 self.__record("ABORT", None, "reboot.start",
447 "reboot command failed")
mblighcf3d83a2007-11-05 19:21:39 +0000448 raise
mbligha0452c82007-08-08 20:24:57 +0000449 if wait:
mbligh5deff3d2008-01-04 21:21:28 +0000450 self.wait_for_restart(timeout)
mblighde384372007-10-17 04:25:37 +0000451 self.__load_netconsole_module() # if the builtin fails
mbligha0452c82007-08-08 20:24:57 +0000452
mbligh7d2bde82007-08-02 16:26:10 +0000453
mblighcfc7ab32008-01-25 16:35:28 +0000454 def __copy_files(self, sources, dest):
455 """
456 Copy files from one machine to another.
457
458 This is for internal use by other methods that intend to move
459 files between machines. It expects a list of source files and
460 a destination (a filename if the source is a single file, a
461 destination otherwise). The names must already be
462 pre-processed into the appropriate rsync/scp friendly
463 format (%s@%s:%s).
464 """
465 # wait until there are only a small number of copies running
466 # before starting this one
467 MAXIMUM_SIMULTANEOUS_COPIES = 4
468 while True:
469 copy_count = 0
470 procs = utils.system_output('ps -ef')
471 for line in procs:
472 if 'rsync ' in line or 'scp ' in line:
473 copy_count += 1
474 if copy_count < MAXIMUM_SIMULTANEOUS_COPIES:
475 break
476 time.sleep(60)
477
mbligh22fdf172008-04-07 18:34:56 +0000478 print '__copy_files: copying %s to %s' % (sources, dest)
mblighcfc7ab32008-01-25 16:35:28 +0000479 try:
480 utils.run('rsync --rsh="%s" -az %s %s' % (
481 self.ssh_base_command(), ' '.join(sources), dest))
482 except Exception:
483 utils.run('scp -rpq %s "%s"' % (
484 ' '.join(sources), dest))
485
486
mblighdcd57a82007-07-11 23:06:47 +0000487 def get_file(self, source, dest):
mbligh7d2bde82007-08-02 16:26:10 +0000488 """
489 Copy files from the remote host to a local path.
mblighdcd57a82007-07-11 23:06:47 +0000490
491 Directories will be copied recursively.
492 If a source component is a directory with a trailing slash,
493 the content of the directory will be copied, otherwise, the
494 directory itself and its content will be copied. This
495 behavior is similar to that of the program 'rsync'.
496
497 Args:
498 source: either
499 1) a single file or directory, as a string
500 2) a list of one or more (possibly mixed)
501 files or directories
502 dest: a file or a directory (if source contains a
503 directory or more than one element, you must
504 supply a directory dest)
505
506 Raises:
507 AutoservRunError: the scp command failed
508 """
509 if isinstance(source, types.StringTypes):
510 source= [source]
511
512 processed_source= []
513 for entry in source:
514 if entry.endswith('/'):
515 format_string= '%s@%s:"%s*"'
516 else:
517 format_string= '%s@%s:"%s"'
518 entry= format_string % (self.user, self.hostname,
519 utils.scp_remote_escape(entry))
520 processed_source.append(entry)
521
522 processed_dest= os.path.abspath(dest)
523 if os.path.isdir(dest):
524 processed_dest= "%s/" % (utils.sh_escape(processed_dest),)
525 else:
526 processed_dest= utils.sh_escape(processed_dest)
mblighcfc7ab32008-01-25 16:35:28 +0000527
528 self.__copy_files(processed_source, processed_dest)
mbligh7d2bde82007-08-02 16:26:10 +0000529
530
mblighdcd57a82007-07-11 23:06:47 +0000531 def send_file(self, source, dest):
mbligh7d2bde82007-08-02 16:26:10 +0000532 """
533 Copy files from a local path to the remote host.
mblighdcd57a82007-07-11 23:06:47 +0000534
535 Directories will be copied recursively.
536 If a source component is a directory with a trailing slash,
537 the content of the directory will be copied, otherwise, the
538 directory itself and its content will be copied. This
539 behavior is similar to that of the program 'rsync'.
540
541 Args:
542 source: either
543 1) a single file or directory, as a string
544 2) a list of one or more (possibly mixed)
545 files or directories
546 dest: a file or a directory (if source contains a
547 directory or more than one element, you must
548 supply a directory dest)
549
550 Raises:
551 AutoservRunError: the scp command failed
552 """
553 if isinstance(source, types.StringTypes):
554 source= [source]
555
556 processed_source= []
557 for entry in source:
558 if entry.endswith('/'):
559 format_string= '"%s/"*'
560 else:
561 format_string= '"%s"'
562 entry= format_string % (utils.sh_escape(os.path.abspath(entry)),)
563 processed_source.append(entry)
mbligh7d2bde82007-08-02 16:26:10 +0000564
mbligh0faf91f2007-10-18 03:10:48 +0000565 remote_dest = '%s@%s:"%s"' % (
566 self.user, self.hostname,
567 utils.scp_remote_escape(dest))
mblighcfc7ab32008-01-25 16:35:28 +0000568
569 self.__copy_files(processed_source, remote_dest)
mblighc42141f2007-11-05 20:25:46 +0000570 self.run('find "%s" -type d | xargs -r chmod o+rx' % dest)
571 self.run('find "%s" -type f | xargs -r chmod o+r' % dest)
mbligh7d2bde82007-08-02 16:26:10 +0000572
mblighdcd57a82007-07-11 23:06:47 +0000573 def get_tmp_dir(self):
mbligh7d2bde82007-08-02 16:26:10 +0000574 """
575 Return the pathname of a directory on the host suitable
mblighdcd57a82007-07-11 23:06:47 +0000576 for temporary file storage.
577
578 The directory and its content will be deleted automatically
579 on the destruction of the Host object that was used to obtain
580 it.
581 """
mbligha25b29e2007-08-26 13:58:04 +0000582 dir_name= self.run("mktemp -d /tmp/autoserv-XXXXXX").stdout.rstrip(" \n")
mblighdcd57a82007-07-11 23:06:47 +0000583 self.tmp_dirs.append(dir_name)
584 return dir_name
mbligh7d2bde82007-08-02 16:26:10 +0000585
586
mblighdcd57a82007-07-11 23:06:47 +0000587 def is_up(self):
mbligh7d2bde82007-08-02 16:26:10 +0000588 """
589 Check if the remote host is up.
mblighdcd57a82007-07-11 23:06:47 +0000590
591 Returns:
592 True if the remote host is up, False otherwise
593 """
594 try:
mbligh4cfa76a2007-11-26 20:45:16 +0000595 self.ssh_ping()
mbligheadfbb12007-11-26 23:03:12 +0000596 except:
mblighdcd57a82007-07-11 23:06:47 +0000597 return False
mbligheadfbb12007-11-26 23:03:12 +0000598 return True
mbligh7d2bde82007-08-02 16:26:10 +0000599
mbligh7d2bde82007-08-02 16:26:10 +0000600
mblighdcd57a82007-07-11 23:06:47 +0000601 def wait_up(self, timeout=None):
mbligh7d2bde82007-08-02 16:26:10 +0000602 """
603 Wait until the remote host is up or the timeout expires.
mblighdcd57a82007-07-11 23:06:47 +0000604
605 In fact, it will wait until an ssh connection to the remote
606 host can be established.
607
608 Args:
609 timeout: time limit in seconds before returning even
610 if the host is not up.
611
612 Returns:
613 True if the host was found to be up, False otherwise
614 """
615 if timeout:
616 end_time= time.time() + timeout
617
618 while not timeout or time.time() < end_time:
619 try:
mbligh4cfa76a2007-11-26 20:45:16 +0000620 self.ssh_ping()
mbligheadfbb12007-11-26 23:03:12 +0000621 except:
mblighdcd57a82007-07-11 23:06:47 +0000622 pass
623 else:
mbligheadfbb12007-11-26 23:03:12 +0000624 return True
mblighdcd57a82007-07-11 23:06:47 +0000625 time.sleep(1)
626
627 return False
mbligh7d2bde82007-08-02 16:26:10 +0000628
629
mblighdcd57a82007-07-11 23:06:47 +0000630 def wait_down(self, timeout=None):
mbligh7d2bde82007-08-02 16:26:10 +0000631 """
632 Wait until the remote host is down or the timeout expires.
mblighdcd57a82007-07-11 23:06:47 +0000633
634 In fact, it will wait until an ssh connection to the remote
635 host fails.
636
637 Args:
638 timeout: time limit in seconds before returning even
639 if the host is not up.
640
641 Returns:
642 True if the host was found to be down, False otherwise
643 """
644 if timeout:
645 end_time= time.time() + timeout
646
647 while not timeout or time.time() < end_time:
648 try:
mbligh4cfa76a2007-11-26 20:45:16 +0000649 self.ssh_ping()
mbligheadfbb12007-11-26 23:03:12 +0000650 except:
mblighdcd57a82007-07-11 23:06:47 +0000651 return True
mblighdcd57a82007-07-11 23:06:47 +0000652 time.sleep(1)
653
654 return False
mbligh7d2bde82007-08-02 16:26:10 +0000655
656
mblighdbe4a382007-07-26 19:41:28 +0000657 def ensure_up(self):
mbligh7d2bde82007-08-02 16:26:10 +0000658 """
659 Ensure the host is up if it is not then do not proceed;
660 this prevents cacading failures of tests
661 """
mbligha0452c82007-08-08 20:24:57 +0000662 print 'Ensuring that %s is up before continuing' % self.hostname
663 if hasattr(self, 'hardreset') and not self.wait_up(300):
mblighdbe4a382007-07-26 19:41:28 +0000664 print "Performing a hardreset on %s" % self.hostname
mbligh4ba0b462007-11-05 23:05:40 +0000665 try:
666 self.hardreset()
mbligh03f4fc72007-11-29 20:56:14 +0000667 except AutoservUnsupportedError:
mbligh4ba0b462007-11-05 23:05:40 +0000668 print "Hardreset is unsupported on %s" % self.hostname
mbligha9563b92007-10-25 14:45:56 +0000669 if not self.wait_up(60 * 30):
670 # 30 minutes should be more than enough
mbligh03f4fc72007-11-29 20:56:14 +0000671 raise AutoservHostError
mbligha0452c82007-08-08 20:24:57 +0000672 print 'Host up, continuing'
mbligh7d2bde82007-08-02 16:26:10 +0000673
674
mblighdcd57a82007-07-11 23:06:47 +0000675 def get_num_cpu(self):
mbligh7d2bde82007-08-02 16:26:10 +0000676 """
677 Get the number of CPUs in the host according to
mblighdcd57a82007-07-11 23:06:47 +0000678 /proc/cpuinfo.
679
680 Returns:
681 The number of CPUs
682 """
683
mbligh5f876ad2007-10-12 23:59:53 +0000684 proc_cpuinfo = self.run("cat /proc/cpuinfo").stdout
mblighdcd57a82007-07-11 23:06:47 +0000685 cpus = 0
686 for line in proc_cpuinfo.splitlines():
687 if line.startswith('processor'):
688 cpus += 1
689 return cpus
mbligh5f876ad2007-10-12 23:59:53 +0000690
691
692 def check_uptime(self):
693 """
694 Check that uptime is available and monotonically increasing.
695 """
696 if not self.ping():
mbligh4d6feff2008-01-14 16:48:56 +0000697 raise AutoservHostError('Client is not pingable')
mbligh5f876ad2007-10-12 23:59:53 +0000698 result = self.run("/bin/cat /proc/uptime", 30)
699 return result.stdout.strip().split()[0]
700
701
702 def get_arch(self):
703 """
704 Get the hardware architecture of the remote machine
705 """
706 arch = self.run('/bin/uname -m').stdout.rstrip()
707 if re.match(r'i\d86$', arch):
708 arch = 'i386'
709 return arch
710
711
712 def get_kernel_ver(self):
713 """
714 Get the kernel version of the remote machine
715 """
716 return self.run('/bin/uname -r').stdout.rstrip()
717
718
719 def get_cmdline(self):
720 """
721 Get the kernel command line of the remote machine
722 """
723 return self.run('cat /proc/cmdline').stdout.rstrip()
724
725
726 def ping(self):
727 """
728 Ping the remote system, and return whether it's available
729 """
730 fpingcmd = "%s -q %s" % ('/usr/bin/fping', self.hostname)
731 rc = utils.system(fpingcmd, ignore_status = 1)
732 return (rc == 0)
mblighd2e46052007-11-05 18:31:00 +0000733
mblighf014ff42007-11-26 21:33:11 +0000734
mbligh4cfa76a2007-11-26 20:45:16 +0000735 def ssh_ping(self, timeout = 60):
mbligh4ff46b02008-02-01 17:33:37 +0000736 self.run('true', timeout = timeout, connect_timeout = timeout)
mblighda13d542008-01-03 16:28:34 +0000737
738
739 def get_autodir(self):
740 return self.autodir