blob: f6391d3a22b07d6161c44a5b80779bd32c0ac47d [file] [log] [blame]
mblighdcd57a82007-07-11 23:06:47 +00001#!/usr/bin/python
2#
3# Copyright 2007 Google Inc. Released under the GPL v2
4
mbligh7d2bde82007-08-02 16:26:10 +00005"""
6This module defines the SSHHost class.
mblighdcd57a82007-07-11 23:06:47 +00007
8Implementation details:
9You should import the "hosts" package instead of importing each type of host.
10
11 SSHHost: a remote machine with a ssh access
12"""
13
mbligh7d2bde82007-08-02 16:26:10 +000014__author__ = """
15mbligh@google.com (Martin J. Bligh),
mblighdcd57a82007-07-11 23:06:47 +000016poirier@google.com (Benjamin Poirier),
mbligh7d2bde82007-08-02 16:26:10 +000017stutsman@google.com (Ryan Stutsman)
18"""
mblighdcd57a82007-07-11 23:06:47 +000019
20
mblighf5427bb2008-04-09 15:55:57 +000021import types, os, sys, signal, subprocess, time, re, socket, pdb
22
23from autotest_lib.client.common_lib import error
24from autotest_lib.server import utils
25import remote, bootloader
mblighdcd57a82007-07-11 23:06:47 +000026
27
mblighf5427bb2008-04-09 15:55:57 +000028class SSHHost(remote.RemoteHost):
mbligh7d2bde82007-08-02 16:26:10 +000029 """
30 This class represents a remote machine controlled through an ssh
mblighdcd57a82007-07-11 23:06:47 +000031 session on which you can run programs.
mbligh7d2bde82007-08-02 16:26:10 +000032
mblighdcd57a82007-07-11 23:06:47 +000033 It is not the machine autoserv is running on. The machine must be
34 configured for password-less login, for example through public key
35 authentication.
mbligh7d2bde82007-08-02 16:26:10 +000036
mbligh3409ee72007-10-16 23:58:33 +000037 It includes support for controlling the machine through a serial
38 console on which you can run programs. If such a serial console is
39 set up on the machine then capabilities such as hard reset and
40 boot strap monitoring are available. If the machine does not have a
41 serial console available then ordinary SSH-based commands will
42 still be available, but attempts to use extensions such as
43 console logging or hard reset will fail silently.
44
mblighdcd57a82007-07-11 23:06:47 +000045 Implementation details:
46 This is a leaf class in an abstract class hierarchy, it must
47 implement the unimplemented methods in parent classes.
48 """
mbligh7d2bde82007-08-02 16:26:10 +000049
mbligh31a49de2007-11-05 18:41:19 +000050 DEFAULT_REBOOT_TIMEOUT = 1800
51 job = None
mbligh0faf91f2007-10-18 03:10:48 +000052
mblighde384372007-10-17 04:25:37 +000053 def __init__(self, hostname, user="root", port=22, initialize=True,
mblighf4e04152008-02-21 16:05:53 +000054 conmux_log="console.log",
mblighe6c995f2007-10-26 19:43:01 +000055 conmux_server=None, conmux_attach=None,
mblighda13d542008-01-03 16:28:34 +000056 netconsole_log=None, netconsole_port=6666, autodir=None):
mbligh7d2bde82007-08-02 16:26:10 +000057 """
58 Construct a SSHHost object
mblighdcd57a82007-07-11 23:06:47 +000059
60 Args:
61 hostname: network hostname or address of remote machine
62 user: user to log in as on the remote machine
63 port: port the ssh daemon is listening on on the remote
64 machine
mbligh9708f732007-10-18 03:18:54 +000065 """
mblighdcd57a82007-07-11 23:06:47 +000066 self.hostname= hostname
67 self.user= user
68 self.port= port
69 self.tmp_dirs= []
mbligh137a05c2007-10-04 15:56:51 +000070 self.initialize = initialize
mblighda13d542008-01-03 16:28:34 +000071 self.autodir = autodir
mbligh91334902007-09-28 01:47:59 +000072
mbligh9708f732007-10-18 03:18:54 +000073 super(SSHHost, self).__init__()
74
mbligh3409ee72007-10-16 23:58:33 +000075 self.conmux_server = conmux_server
mbligh70cf0ec2008-01-18 17:57:14 +000076 if conmux_attach:
77 self.conmux_attach = conmux_attach
78 else:
79 self.conmux_attach = os.path.abspath(os.path.join(
80 self.serverdir, '..',
81 'conmux', 'conmux-attach'))
mblighfbb03542008-02-11 16:27:29 +000082 self.logger_popen = None
mblighf4e04152008-02-21 16:05:53 +000083 self.warning_stream = None
mblighde384372007-10-17 04:25:37 +000084 self.__start_console_log(conmux_log)
mbligh3409ee72007-10-16 23:58:33 +000085
mbligha0452c82007-08-08 20:24:57 +000086 self.bootloader = bootloader.Bootloader(self)
mbligh7d2bde82007-08-02 16:26:10 +000087
mblighc0e92392007-11-05 19:10:10 +000088 self.__netconsole_param = ""
mblighfbb03542008-02-11 16:27:29 +000089 self.netlogger_popen = None
mblighc0e92392007-11-05 19:10:10 +000090 if netconsole_log:
91 self.__init_netconsole_params(netconsole_port)
92 self.__start_netconsole_log(netconsole_log, netconsole_port)
93 self.__load_netconsole_module()
mblighde384372007-10-17 04:25:37 +000094
mbligh7d2bde82007-08-02 16:26:10 +000095
mblighfbb03542008-02-11 16:27:29 +000096 @staticmethod
mblighf4e04152008-02-21 16:05:53 +000097 def __kill(popen):
mblighfbb03542008-02-11 16:27:29 +000098 return_code = popen.poll()
mblighf4e04152008-02-21 16:05:53 +000099 if return_code is None:
mblighfbb03542008-02-11 16:27:29 +0000100 try:
mblighf4e04152008-02-21 16:05:53 +0000101 os.kill(popen.pid, signal.SIGTERM)
mblighfbb03542008-02-11 16:27:29 +0000102 except OSError:
103 pass
104
105
mblighdcd57a82007-07-11 23:06:47 +0000106 def __del__(self):
mbligh7d2bde82007-08-02 16:26:10 +0000107 """
108 Destroy a SSHHost object
mblighdcd57a82007-07-11 23:06:47 +0000109 """
110 for dir in self.tmp_dirs:
111 try:
112 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
mblighf5427bb2008-04-09 15:55:57 +0000113 except error.AutoservRunError:
mblighdcd57a82007-07-11 23:06:47 +0000114 pass
mblighde384372007-10-17 04:25:37 +0000115 # kill the console logger
mblighfbb03542008-02-11 16:27:29 +0000116 if getattr(self, 'logger_popen', None):
mblighf4e04152008-02-21 16:05:53 +0000117 self.__kill(self.logger_popen)
mbligh6607d192008-04-17 15:23:15 +0000118 if self.job:
119 self.job.warning_loggers.discard(
120 self.warning_stream)
mblighf4e04152008-02-21 16:05:53 +0000121 self.warning_stream.close()
mblighde384372007-10-17 04:25:37 +0000122 # kill the netconsole logger
mblighfbb03542008-02-11 16:27:29 +0000123 if getattr(self, 'netlogger_popen', None):
mblighe6c995f2007-10-26 19:43:01 +0000124 self.__unload_netconsole_module()
mblighf4e04152008-02-21 16:05:53 +0000125 self.__kill(self.netlogger_popen)
mblighde384372007-10-17 04:25:37 +0000126
127
128 def __init_netconsole_params(self, port):
129 """
130 Connect to the remote machine and determine the values to use for the
131 required netconsole parameters.
132 """
mblighde384372007-10-17 04:25:37 +0000133 # PROBLEM: on machines with multiple IPs this may not make any sense
134 # It also doesn't work with IPv6
135 remote_ip = socket.gethostbyname(self.hostname)
136 local_ip = socket.gethostbyname(socket.gethostname())
137 # Get the gateway of the remote machine
138 try:
139 traceroute = self.run('traceroute -n %s' % local_ip)
mblighf5427bb2008-04-09 15:55:57 +0000140 except error.AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000141 return
142 first_node = traceroute.stdout.split("\n")[0]
143 match = re.search(r'\s+((\d+\.){3}\d+)\s+', first_node)
144 if match:
145 router_ip = match.group(1)
146 else:
147 return
148 # Look up the MAC address of the gateway
149 try:
150 self.run('ping -c 1 %s' % router_ip)
151 arp = self.run('arp -n -a %s' % router_ip)
mblighf5427bb2008-04-09 15:55:57 +0000152 except error.AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000153 return
154 match = re.search(r'\s+(([0-9A-F]{2}:){5}[0-9A-F]{2})\s+', arp.stdout)
155 if match:
156 gateway_mac = match.group(1)
157 else:
158 return
159 self.__netconsole_param = 'netconsole=@%s/,%s@%s/%s' % (remote_ip,
160 port,
161 local_ip,
162 gateway_mac)
163
164
165 def __start_netconsole_log(self, logfilename, port):
166 """
167 Log the output of netconsole to a specified file
168 """
169 if logfilename == None:
170 return
171 cmd = ['nc', '-u', '-l', '-p', str(port)]
mblighfbb03542008-02-11 16:27:29 +0000172 logfile = open(logfilename, 'a', 0)
173 self.netlogger_popen = subprocess.Popen(cmd, stdout=logfile)
mblighde384372007-10-17 04:25:37 +0000174
175
176 def __load_netconsole_module(self):
177 """
178 Make a best effort to load the netconsole module.
179
180 Note that loading the module can fail even when the remote machine is
181 working correctly if netconsole is already compiled into the kernel
182 and started.
183 """
mblighc0e92392007-11-05 19:10:10 +0000184 if not self.__netconsole_param:
185 return
mblighde384372007-10-17 04:25:37 +0000186 try:
187 self.run('modprobe netconsole %s' % self.__netconsole_param)
mblighf5427bb2008-04-09 15:55:57 +0000188 except error.AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000189 # if it fails there isn't much we can do, just keep going
190 pass
191
192
193 def __unload_netconsole_module(self):
194 try:
195 self.run('modprobe -r netconsole')
mblighf5427bb2008-04-09 15:55:57 +0000196 except error.AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000197 pass
mbligh3409ee72007-10-16 23:58:33 +0000198
199
mbligh5deff3d2008-01-04 21:21:28 +0000200 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT):
mblighd567f722007-10-30 15:37:33 +0000201 if not self.wait_down(300): # Make sure he's dead, Jim
mblighf3b78932007-11-07 16:52:47 +0000202 self.__record("ABORT", None, "reboot.verify", "shutdown failed")
mblighf5427bb2008-04-09 15:55:57 +0000203 raise error.AutoservRebootError(
204 "Host did not shut down")
mbligh3409ee72007-10-16 23:58:33 +0000205 self.wait_up(timeout)
206 time.sleep(2) # this is needed for complete reliability
mblighcf3d83a2007-11-05 19:21:39 +0000207 if self.wait_up(timeout):
mbligh30270302007-11-05 20:33:52 +0000208 self.__record("GOOD", None, "reboot.verify")
mblighcf3d83a2007-11-05 19:21:39 +0000209 else:
mbligh71d24222008-03-11 21:31:56 +0000210 self.__record("ABORT", None, "reboot.verify", "Host did not return from reboot")
mblighf5427bb2008-04-09 15:55:57 +0000211 raise error.AutoservRebootError(
212 "Host did not return from reboot")
mbligh3409ee72007-10-16 23:58:33 +0000213 print "Reboot complete"
214
215
mbligh80d20772007-10-29 17:10:10 +0000216 def hardreset(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True):
mbligh3409ee72007-10-16 23:58:33 +0000217 """
218 Reach out and slap the box in the power switch
219 """
mblighf3b78932007-11-07 16:52:47 +0000220 if not self.__console_run(r"'~$hardreset'"):
221 self.__record("ABORT", None, "reboot.start", "hard reset unavailable")
mblighf5427bb2008-04-09 15:55:57 +0000222 raise error.AutoservUnsupportedError(
223 'Hard reset unavailable')
mbligh37d53c32008-01-14 16:16:00 +0000224
225 if wait:
226 self.wait_for_restart(timeout)
mbligha4d4f372008-01-22 15:49:50 +0000227 self.__record("GOOD", None, "reboot.start", "hard reset")
mbligh3409ee72007-10-16 23:58:33 +0000228
229
mblighe6c995f2007-10-26 19:43:01 +0000230 def __conmux_hostname(self):
231 if self.conmux_server:
232 return '%s/%s' % (self.conmux_server, self.hostname)
233 else:
234 return self.hostname
235
236
mbligh3409ee72007-10-16 23:58:33 +0000237 def __start_console_log(self, logfilename):
238 """
239 Log the output of the console session to a specified file
240 """
241 if logfilename == None:
242 return
243 if not self.conmux_attach or not os.path.exists(self.conmux_attach):
244 return
mblighf4e04152008-02-21 16:05:53 +0000245
246 r, w = os.pipe()
247 script_path = os.path.join(self.serverdir,
248 'warning_monitor.py')
mblighfbb03542008-02-11 16:27:29 +0000249 cmd = [self.conmux_attach, self.__conmux_hostname(),
mblighf4e04152008-02-21 16:05:53 +0000250 '%s %s %s %d' % (sys.executable, script_path,
251 logfilename, w)]
mbligh0c5ce312008-02-21 16:24:11 +0000252 dev_null = open(os.devnull, 'w')
mbligh3409ee72007-10-16 23:58:33 +0000253
mblighf4e04152008-02-21 16:05:53 +0000254 self.warning_stream = os.fdopen(r, 'r', 0)
mbligh6607d192008-04-17 15:23:15 +0000255 if self.job:
256 self.job.warning_loggers.add(self.warning_stream)
mblighf4e04152008-02-21 16:05:53 +0000257 self.logger_popen = subprocess.Popen(cmd, stderr=dev_null)
258 os.close(w)
mblighe6c995f2007-10-26 19:43:01 +0000259
260
mbligh3409ee72007-10-16 23:58:33 +0000261 def __console_run(self, cmd):
262 """
263 Send a command to the conmux session
264 """
265 if not self.conmux_attach or not os.path.exists(self.conmux_attach):
266 return False
mbligh3409ee72007-10-16 23:58:33 +0000267 cmd = '%s %s echo %s 2> /dev/null' % (self.conmux_attach,
mblighe6c995f2007-10-26 19:43:01 +0000268 self.__conmux_hostname(),
mbligh3409ee72007-10-16 23:58:33 +0000269 cmd)
mbligh0f5ad642008-01-22 16:37:40 +0000270 result = utils.system(cmd, ignore_status=True)
mbligh3409ee72007-10-16 23:58:33 +0000271 return result == 0
mbligh7d2bde82007-08-02 16:26:10 +0000272
273
mbligh31a49de2007-11-05 18:41:19 +0000274 def __record(self, status_code, subdir, operation, status = ''):
275 if self.job:
276 self.job.record(status_code, subdir, operation, status)
277 else:
278 if not subdir:
279 subdir = "----"
280 msg = "%s\t%s\t%s\t%s" % (status_code, subdir, operation, status)
281 sys.stderr.write(msg + "\n")
282
283
mblighfa971602008-01-03 01:57:20 +0000284 def ssh_base_command(self, connect_timeout=30):
285 SSH_BASE_COMMAND = '/usr/bin/ssh -a -x -o ' + \
mbligh0ad21ba2008-03-14 15:06:21 +0000286 'BatchMode=yes -o ConnectTimeout=%d ' + \
287 '-o ServerAliveInterval=300'
mblighfa971602008-01-03 01:57:20 +0000288 assert isinstance(connect_timeout, (int, long))
289 assert connect_timeout > 0 # can't disable the timeout
290 return SSH_BASE_COMMAND % connect_timeout
291
292
293 def ssh_command(self, connect_timeout=30):
mblighe6647d12007-10-17 00:00:01 +0000294 """Construct an ssh command with proper args for this host."""
mblighfa971602008-01-03 01:57:20 +0000295 ssh = self.ssh_base_command(connect_timeout)
296 return r'%s -l %s -p %d %s' % (ssh,
mbligh0faf91f2007-10-18 03:10:48 +0000297 self.user,
298 self.port,
299 self.hostname)
mblighe6647d12007-10-17 00:00:01 +0000300
301
mbligh07a923f2008-01-16 17:49:04 +0000302 def run(self, command, timeout=3600, ignore_status=False,
mblighfa971602008-01-03 01:57:20 +0000303 stdout_tee=None, stderr_tee=None, connect_timeout=30):
mbligh7d2bde82007-08-02 16:26:10 +0000304 """
305 Run a command on the remote host.
mblighdcd57a82007-07-11 23:06:47 +0000306
307 Args:
308 command: the command line string
309 timeout: time limit in seconds before attempting to
310 kill the running process. The run() function
311 will take a few seconds longer than 'timeout'
312 to complete if it has to kill the process.
mbligh8b85dfb2007-08-28 09:50:31 +0000313 ignore_status: do not raise an exception, no matter
314 what the exit code of the command is.
mblighdcd57a82007-07-11 23:06:47 +0000315
316 Returns:
317 a hosts.base_classes.CmdResult object
318
319 Raises:
320 AutoservRunError: the exit code of the command
321 execution was not 0
mblighcaa62c22008-04-07 21:51:17 +0000322 AutoservSSHTimeout: ssh connection has timed out
mblighdcd57a82007-07-11 23:06:47 +0000323 """
mblighadf2aab2007-11-29 18:16:43 +0000324 stdout = stdout_tee or sys.stdout
mbligh8d4baaa2008-03-12 14:48:24 +0000325 stderr = stderr_tee or sys.stdout
mbligh7995cc62007-11-30 15:53:23 +0000326 print "ssh: %s" % (command,)
mblighadf2aab2007-11-29 18:16:43 +0000327 env = " ".join("=".join(pair) for pair in self.env.iteritems())
mbligh34faa282008-01-16 17:44:49 +0000328 full_cmd = '%s "%s %s"' % (self.ssh_command(connect_timeout),
329 env, utils.sh_escape(command))
330 result = utils.run(full_cmd, timeout, True, stdout, stderr)
331 if result.exit_status == 255: # ssh's exit status for timeout
332 if re.match(r'^ssh: connect to host .* port .*: ' +
333 r'Connection timed out\r$', result.stderr):
mblighf5427bb2008-04-09 15:55:57 +0000334 raise error.AutoservSSHTimeout("ssh timed out",
335 result)
mbligh34faa282008-01-16 17:44:49 +0000336 if not ignore_status and result.exit_status > 0:
mblighf5427bb2008-04-09 15:55:57 +0000337 raise error.AutoservRunError("command execution error",
338 result)
mblighdcd57a82007-07-11 23:06:47 +0000339 return result
mbligh7d2bde82007-08-02 16:26:10 +0000340
341
mblighbda9c9c2008-04-08 17:45:00 +0000342 def run_short(self, command, **kwargs):
343 """
344 Calls the run() command with a short default timeout.
345
346 Args:
347 Takes the same arguments as does run(),
348 with the exception of the timeout argument which
349 here is fixed at 60 seconds.
350 It returns the result of run.
351 """
352 return self.run(command, timeout=60, **kwargs)
353
354
mbligh78669ff2008-01-10 16:33:07 +0000355 def run_grep(self, command, timeout=30, ignore_status=False,
356 stdout_ok_regexp=None, stdout_err_regexp=None,
357 stderr_ok_regexp=None, stderr_err_regexp=None,
358 connect_timeout=30):
359 """
360 Run a command on the remote host and look for regexp
361 in stdout or stderr to determine if the command was
362 successul or not.
mbligh6a2a2df2008-01-16 17:41:55 +0000363
mbligh78669ff2008-01-10 16:33:07 +0000364 Args:
365 command: the command line string
mbligh6a2a2df2008-01-16 17:41:55 +0000366 timeout: time limit in seconds before attempting to
mbligh78669ff2008-01-10 16:33:07 +0000367 kill the running process. The run() function
368 will take a few seconds longer than 'timeout'
369 to complete if it has to kill the process.
mbligh6a2a2df2008-01-16 17:41:55 +0000370 ignore_status: do not raise an exception, no matter
mbligh78669ff2008-01-10 16:33:07 +0000371 what the exit code of the command is.
372 stdout_ok_regexp: regexp that should be in stdout
373 if the command was successul.
374 stdout_err_regexp: regexp that should be in stdout
375 if the command failed.
376 stderr_ok_regexp: regexp that should be in stderr
377 if the command was successul.
378 stderr_err_regexp: regexp that should be in stderr
379 if the command failed.
mbligh6a2a2df2008-01-16 17:41:55 +0000380
mbligh78669ff2008-01-10 16:33:07 +0000381 Returns:
382 if the command was successul, raises an exception
383 otherwise.
mbligh6a2a2df2008-01-16 17:41:55 +0000384
mbligh78669ff2008-01-10 16:33:07 +0000385 Raises:
386 AutoservRunError:
387 - the exit code of the command execution was not 0.
mbligh6a2a2df2008-01-16 17:41:55 +0000388 - If stderr_err_regexp is found in stderr,
389 - If stdout_err_regexp is found in stdout,
mbligh78669ff2008-01-10 16:33:07 +0000390 - If stderr_ok_regexp is not found in stderr.
391 - If stdout_ok_regexp is not found in stdout,
392 """
393
394 # We ignore the status, because we will handle it at the end.
395 result = self.run(command, timeout, ignore_status=True,
mbligh6a2a2df2008-01-16 17:41:55 +0000396 connect_timeout=connect_timeout)
mbligh78669ff2008-01-10 16:33:07 +0000397
398 # Look for the patterns, in order
399 for (regexp, stream) in ((stderr_err_regexp, result.stderr),
mbligh6a2a2df2008-01-16 17:41:55 +0000400 (stdout_err_regexp, result.stdout)):
mbligh78669ff2008-01-10 16:33:07 +0000401 if regexp and stream:
402 err_re = re.compile (regexp)
403 if err_re.search(stream):
mblighf5427bb2008-04-09 15:55:57 +0000404 raise error.AutoservRunError(
mbligh6a2a2df2008-01-16 17:41:55 +0000405 '%s failed, found error pattern: '
406 '"%s"' % (command, regexp), result)
mbligh78669ff2008-01-10 16:33:07 +0000407
408 for (regexp, stream) in ((stderr_ok_regexp, result.stderr),
mbligh6a2a2df2008-01-16 17:41:55 +0000409 (stdout_ok_regexp, result.stdout)):
mbligh78669ff2008-01-10 16:33:07 +0000410 if regexp and stream:
411 ok_re = re.compile (regexp)
412 if ok_re.search(stream):
413 if ok_re.search(stream):
414 return
415
416 if not ignore_status and result.exit_status > 0:
mblighf5427bb2008-04-09 15:55:57 +0000417 raise error.AutoservRunError("command execution error",
418 result)
mbligh78669ff2008-01-10 16:33:07 +0000419
420
mbligh80d20772007-10-29 17:10:10 +0000421 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, label=None,
422 kernel_args=None, wait=True):
mbligh7d2bde82007-08-02 16:26:10 +0000423 """
424 Reboot the remote host.
mbligh8b85dfb2007-08-28 09:50:31 +0000425
mbligha0452c82007-08-08 20:24:57 +0000426 Args:
427 timeout
mbligh8b85dfb2007-08-28 09:50:31 +0000428 """
mbligh33ae0902007-11-24 19:27:08 +0000429 self.reboot_setup()
430
mblighde384372007-10-17 04:25:37 +0000431 # forcibly include the "netconsole" kernel arg
432 if self.__netconsole_param:
433 if kernel_args is None:
434 kernel_args = self.__netconsole_param
435 else:
436 kernel_args += " " + self.__netconsole_param
437 # unload the (possibly loaded) module to avoid shutdown issues
438 self.__unload_netconsole_module()
mbligha0452c82007-08-08 20:24:57 +0000439 if label or kernel_args:
440 self.bootloader.install_boottool()
441 if label:
442 self.bootloader.set_default(label)
443 if kernel_args:
444 if not label:
445 default = int(self.bootloader.get_default())
446 label = self.bootloader.get_titles()[default]
447 self.bootloader.add_args(label, kernel_args)
mblighd742a222007-09-30 01:27:06 +0000448 print "Reboot: initiating reboot"
mbligh30270302007-11-05 20:33:52 +0000449 self.__record("GOOD", None, "reboot.start")
mblighcf3d83a2007-11-05 19:21:39 +0000450 try:
mblighf3b78932007-11-07 16:52:47 +0000451 self.run('(sleep 5; reboot) </dev/null >/dev/null 2>&1 &')
mblighf5427bb2008-04-09 15:55:57 +0000452 except error.AutoservRunError:
mblighf3b78932007-11-07 16:52:47 +0000453 self.__record("ABORT", None, "reboot.start",
454 "reboot command failed")
mblighcf3d83a2007-11-05 19:21:39 +0000455 raise
mbligha0452c82007-08-08 20:24:57 +0000456 if wait:
mbligh7f2befb2008-04-21 20:47:10 +0000457 self.wait_for_restart(timeout)
458 self.reboot_followup()
459
460
461 def reboot_followup(self):
462 super(SSHHost, self).reboot_followup()
463 self.__load_netconsole_module() # if the builtin fails
mbligha0452c82007-08-08 20:24:57 +0000464
mbligh7d2bde82007-08-02 16:26:10 +0000465
mblighcfc7ab32008-01-25 16:35:28 +0000466 def __copy_files(self, sources, dest):
467 """
468 Copy files from one machine to another.
469
470 This is for internal use by other methods that intend to move
471 files between machines. It expects a list of source files and
472 a destination (a filename if the source is a single file, a
473 destination otherwise). The names must already be
474 pre-processed into the appropriate rsync/scp friendly
475 format (%s@%s:%s).
476 """
477 # wait until there are only a small number of copies running
478 # before starting this one
479 MAXIMUM_SIMULTANEOUS_COPIES = 4
480 while True:
481 copy_count = 0
482 procs = utils.system_output('ps -ef')
483 for line in procs:
484 if 'rsync ' in line or 'scp ' in line:
485 copy_count += 1
486 if copy_count < MAXIMUM_SIMULTANEOUS_COPIES:
487 break
488 time.sleep(60)
489
mbligh22fdf172008-04-07 18:34:56 +0000490 print '__copy_files: copying %s to %s' % (sources, dest)
mblighcfc7ab32008-01-25 16:35:28 +0000491 try:
492 utils.run('rsync --rsh="%s" -az %s %s' % (
493 self.ssh_base_command(), ' '.join(sources), dest))
494 except Exception:
495 utils.run('scp -rpq %s "%s"' % (
496 ' '.join(sources), dest))
497
498
mblighdcd57a82007-07-11 23:06:47 +0000499 def get_file(self, source, dest):
mbligh7d2bde82007-08-02 16:26:10 +0000500 """
501 Copy files from the remote host to a local path.
mblighdcd57a82007-07-11 23:06:47 +0000502
503 Directories will be copied recursively.
504 If a source component is a directory with a trailing slash,
505 the content of the directory will be copied, otherwise, the
506 directory itself and its content will be copied. This
507 behavior is similar to that of the program 'rsync'.
508
509 Args:
510 source: either
511 1) a single file or directory, as a string
512 2) a list of one or more (possibly mixed)
513 files or directories
514 dest: a file or a directory (if source contains a
515 directory or more than one element, you must
516 supply a directory dest)
517
518 Raises:
519 AutoservRunError: the scp command failed
520 """
521 if isinstance(source, types.StringTypes):
522 source= [source]
523
524 processed_source= []
525 for entry in source:
526 if entry.endswith('/'):
527 format_string= '%s@%s:"%s*"'
528 else:
529 format_string= '%s@%s:"%s"'
530 entry= format_string % (self.user, self.hostname,
531 utils.scp_remote_escape(entry))
532 processed_source.append(entry)
533
534 processed_dest= os.path.abspath(dest)
535 if os.path.isdir(dest):
536 processed_dest= "%s/" % (utils.sh_escape(processed_dest),)
537 else:
538 processed_dest= utils.sh_escape(processed_dest)
mblighcfc7ab32008-01-25 16:35:28 +0000539
540 self.__copy_files(processed_source, processed_dest)
mbligh7d2bde82007-08-02 16:26:10 +0000541
542
mblighdcd57a82007-07-11 23:06:47 +0000543 def send_file(self, source, dest):
mbligh7d2bde82007-08-02 16:26:10 +0000544 """
545 Copy files from a local path to the remote host.
mblighdcd57a82007-07-11 23:06:47 +0000546
547 Directories will be copied recursively.
548 If a source component is a directory with a trailing slash,
549 the content of the directory will be copied, otherwise, the
550 directory itself and its content will be copied. This
551 behavior is similar to that of the program 'rsync'.
552
553 Args:
554 source: either
555 1) a single file or directory, as a string
556 2) a list of one or more (possibly mixed)
557 files or directories
558 dest: a file or a directory (if source contains a
559 directory or more than one element, you must
560 supply a directory dest)
561
562 Raises:
563 AutoservRunError: the scp command failed
564 """
565 if isinstance(source, types.StringTypes):
566 source= [source]
567
568 processed_source= []
569 for entry in source:
570 if entry.endswith('/'):
571 format_string= '"%s/"*'
572 else:
573 format_string= '"%s"'
574 entry= format_string % (utils.sh_escape(os.path.abspath(entry)),)
575 processed_source.append(entry)
mbligh7d2bde82007-08-02 16:26:10 +0000576
mbligh0faf91f2007-10-18 03:10:48 +0000577 remote_dest = '%s@%s:"%s"' % (
578 self.user, self.hostname,
579 utils.scp_remote_escape(dest))
mblighcfc7ab32008-01-25 16:35:28 +0000580
581 self.__copy_files(processed_source, remote_dest)
mbligha4eb0fa2008-04-11 15:16:50 +0000582 self.run('find "%s" -type d | xargs -i -r chmod o+rx "{}"' % dest)
583 self.run('find "%s" -type f | xargs -i -r chmod o+r "{}"' % dest)
mbligh7d2bde82007-08-02 16:26:10 +0000584
mblighdcd57a82007-07-11 23:06:47 +0000585 def get_tmp_dir(self):
mbligh7d2bde82007-08-02 16:26:10 +0000586 """
587 Return the pathname of a directory on the host suitable
mblighdcd57a82007-07-11 23:06:47 +0000588 for temporary file storage.
589
590 The directory and its content will be deleted automatically
591 on the destruction of the Host object that was used to obtain
592 it.
593 """
mbligha25b29e2007-08-26 13:58:04 +0000594 dir_name= self.run("mktemp -d /tmp/autoserv-XXXXXX").stdout.rstrip(" \n")
mblighdcd57a82007-07-11 23:06:47 +0000595 self.tmp_dirs.append(dir_name)
596 return dir_name
mbligh7d2bde82007-08-02 16:26:10 +0000597
598
mblighdcd57a82007-07-11 23:06:47 +0000599 def is_up(self):
mbligh7d2bde82007-08-02 16:26:10 +0000600 """
601 Check if the remote host is up.
mblighdcd57a82007-07-11 23:06:47 +0000602
603 Returns:
604 True if the remote host is up, False otherwise
605 """
606 try:
mbligh4cfa76a2007-11-26 20:45:16 +0000607 self.ssh_ping()
mbligheadfbb12007-11-26 23:03:12 +0000608 except:
mblighdcd57a82007-07-11 23:06:47 +0000609 return False
mbligheadfbb12007-11-26 23:03:12 +0000610 return True
mbligh7d2bde82007-08-02 16:26:10 +0000611
mbligh7d2bde82007-08-02 16:26:10 +0000612
mblighdcd57a82007-07-11 23:06:47 +0000613 def wait_up(self, timeout=None):
mbligh7d2bde82007-08-02 16:26:10 +0000614 """
615 Wait until the remote host is up or the timeout expires.
mblighdcd57a82007-07-11 23:06:47 +0000616
617 In fact, it will wait until an ssh connection to the remote
618 host can be established.
619
620 Args:
621 timeout: time limit in seconds before returning even
622 if the host is not up.
623
624 Returns:
625 True if the host was found to be up, False otherwise
626 """
627 if timeout:
628 end_time= time.time() + timeout
629
630 while not timeout or time.time() < end_time:
631 try:
mbligh4cfa76a2007-11-26 20:45:16 +0000632 self.ssh_ping()
mbligheadfbb12007-11-26 23:03:12 +0000633 except:
mblighdcd57a82007-07-11 23:06:47 +0000634 pass
635 else:
mbligheadfbb12007-11-26 23:03:12 +0000636 return True
mblighdcd57a82007-07-11 23:06:47 +0000637 time.sleep(1)
638
639 return False
mbligh7d2bde82007-08-02 16:26:10 +0000640
641
mblighdcd57a82007-07-11 23:06:47 +0000642 def wait_down(self, timeout=None):
mbligh7d2bde82007-08-02 16:26:10 +0000643 """
644 Wait until the remote host is down or the timeout expires.
mblighdcd57a82007-07-11 23:06:47 +0000645
646 In fact, it will wait until an ssh connection to the remote
647 host fails.
648
649 Args:
650 timeout: time limit in seconds before returning even
651 if the host is not up.
652
653 Returns:
654 True if the host was found to be down, False otherwise
655 """
656 if timeout:
657 end_time= time.time() + timeout
658
659 while not timeout or time.time() < end_time:
660 try:
mbligh4cfa76a2007-11-26 20:45:16 +0000661 self.ssh_ping()
mbligheadfbb12007-11-26 23:03:12 +0000662 except:
mblighdcd57a82007-07-11 23:06:47 +0000663 return True
mblighdcd57a82007-07-11 23:06:47 +0000664 time.sleep(1)
665
666 return False
mbligh7d2bde82007-08-02 16:26:10 +0000667
668
mblighdbe4a382007-07-26 19:41:28 +0000669 def ensure_up(self):
mbligh7d2bde82007-08-02 16:26:10 +0000670 """
671 Ensure the host is up if it is not then do not proceed;
672 this prevents cacading failures of tests
673 """
mbligha0452c82007-08-08 20:24:57 +0000674 print 'Ensuring that %s is up before continuing' % self.hostname
675 if hasattr(self, 'hardreset') and not self.wait_up(300):
mblighdbe4a382007-07-26 19:41:28 +0000676 print "Performing a hardreset on %s" % self.hostname
mbligh4ba0b462007-11-05 23:05:40 +0000677 try:
678 self.hardreset()
mblighf5427bb2008-04-09 15:55:57 +0000679 except error.AutoservUnsupportedError:
mbligh4ba0b462007-11-05 23:05:40 +0000680 print "Hardreset is unsupported on %s" % self.hostname
mbligha9563b92007-10-25 14:45:56 +0000681 if not self.wait_up(60 * 30):
682 # 30 minutes should be more than enough
mblighf5427bb2008-04-09 15:55:57 +0000683 raise error.AutoservHostError
mbligha0452c82007-08-08 20:24:57 +0000684 print 'Host up, continuing'
mbligh7d2bde82007-08-02 16:26:10 +0000685
686
mblighdcd57a82007-07-11 23:06:47 +0000687 def get_num_cpu(self):
mbligh7d2bde82007-08-02 16:26:10 +0000688 """
689 Get the number of CPUs in the host according to
mblighdcd57a82007-07-11 23:06:47 +0000690 /proc/cpuinfo.
691
692 Returns:
693 The number of CPUs
694 """
695
mbligh0ba35792008-04-15 19:16:11 +0000696 proc_cpuinfo = self.run("cat /proc/cpuinfo",
697 stdout_tee=open('/dev/null', 'w')).stdout
mblighdcd57a82007-07-11 23:06:47 +0000698 cpus = 0
699 for line in proc_cpuinfo.splitlines():
700 if line.startswith('processor'):
701 cpus += 1
702 return cpus
mbligh5f876ad2007-10-12 23:59:53 +0000703
704
705 def check_uptime(self):
706 """
707 Check that uptime is available and monotonically increasing.
708 """
709 if not self.ping():
mblighf5427bb2008-04-09 15:55:57 +0000710 raise error.AutoservHostError('Client is not pingable')
mbligh5f876ad2007-10-12 23:59:53 +0000711 result = self.run("/bin/cat /proc/uptime", 30)
712 return result.stdout.strip().split()[0]
713
714
715 def get_arch(self):
716 """
717 Get the hardware architecture of the remote machine
718 """
719 arch = self.run('/bin/uname -m').stdout.rstrip()
720 if re.match(r'i\d86$', arch):
721 arch = 'i386'
722 return arch
723
724
725 def get_kernel_ver(self):
726 """
727 Get the kernel version of the remote machine
728 """
729 return self.run('/bin/uname -r').stdout.rstrip()
730
731
732 def get_cmdline(self):
733 """
734 Get the kernel command line of the remote machine
735 """
736 return self.run('cat /proc/cmdline').stdout.rstrip()
737
738
739 def ping(self):
740 """
741 Ping the remote system, and return whether it's available
742 """
743 fpingcmd = "%s -q %s" % ('/usr/bin/fping', self.hostname)
744 rc = utils.system(fpingcmd, ignore_status = 1)
745 return (rc == 0)
mblighd2e46052007-11-05 18:31:00 +0000746
mblighf014ff42007-11-26 21:33:11 +0000747
mbligh4cfa76a2007-11-26 20:45:16 +0000748 def ssh_ping(self, timeout = 60):
mbligh21d7e262008-05-02 23:04:50 +0000749 try:
750 self.run('true', timeout = timeout, connect_timeout = timeout)
751 except error.AutoservSSHTimeout:
752 msg = "ssh ping timed out. timeout = %s" % timeout
753 raise error.AutoservSSHTimeout(msg)
jadmanski4c6f15e2008-05-06 20:36:09 +0000754 except error.AutoservRunError, exc:
mbligh21d7e262008-05-02 23:04:50 +0000755 msg = "command true failed in ssh ping"
jadmanski4c6f15e2008-05-06 20:36:09 +0000756 raise error.AutoservRunError(msg, exc.args[1])
mblighda13d542008-01-03 16:28:34 +0000757
758
759 def get_autodir(self):
760 return self.autodir