blob: 158becf7aa3e95fc26d8b316c20fae1e01999718 [file] [log] [blame]
mblighdcd57a82007-07-11 23:06:47 +00001#!/usr/bin/python
2#
3# Copyright 2007 Google Inc. Released under the GPL v2
4
mbligh7d2bde82007-08-02 16:26:10 +00005"""
6This module defines the SSHHost class.
mblighdcd57a82007-07-11 23:06:47 +00007
8Implementation details:
9You should import the "hosts" package instead of importing each type of host.
10
11 SSHHost: a remote machine with a ssh access
12"""
13
mbligh7d2bde82007-08-02 16:26:10 +000014__author__ = """
15mbligh@google.com (Martin J. Bligh),
mblighdcd57a82007-07-11 23:06:47 +000016poirier@google.com (Benjamin Poirier),
mbligh7d2bde82007-08-02 16:26:10 +000017stutsman@google.com (Ryan Stutsman)
18"""
mblighdcd57a82007-07-11 23:06:47 +000019
20
mblighf5427bb2008-04-09 15:55:57 +000021import types, os, sys, signal, subprocess, time, re, socket, pdb
22
23from autotest_lib.client.common_lib import error
24from autotest_lib.server import utils
25import remote, bootloader
mblighdcd57a82007-07-11 23:06:47 +000026
27
mblighbda9c9c2008-04-08 17:45:00 +000028
mblighf5427bb2008-04-09 15:55:57 +000029class SSHHost(remote.RemoteHost):
mbligh7d2bde82007-08-02 16:26:10 +000030 """
31 This class represents a remote machine controlled through an ssh
mblighdcd57a82007-07-11 23:06:47 +000032 session on which you can run programs.
mbligh7d2bde82007-08-02 16:26:10 +000033
mblighdcd57a82007-07-11 23:06:47 +000034 It is not the machine autoserv is running on. The machine must be
35 configured for password-less login, for example through public key
36 authentication.
mbligh7d2bde82007-08-02 16:26:10 +000037
mbligh3409ee72007-10-16 23:58:33 +000038 It includes support for controlling the machine through a serial
39 console on which you can run programs. If such a serial console is
40 set up on the machine then capabilities such as hard reset and
41 boot strap monitoring are available. If the machine does not have a
42 serial console available then ordinary SSH-based commands will
43 still be available, but attempts to use extensions such as
44 console logging or hard reset will fail silently.
45
mblighdcd57a82007-07-11 23:06:47 +000046 Implementation details:
47 This is a leaf class in an abstract class hierarchy, it must
48 implement the unimplemented methods in parent classes.
49 """
mbligh7d2bde82007-08-02 16:26:10 +000050
mbligh31a49de2007-11-05 18:41:19 +000051 DEFAULT_REBOOT_TIMEOUT = 1800
52 job = None
mbligh0faf91f2007-10-18 03:10:48 +000053
mblighde384372007-10-17 04:25:37 +000054 def __init__(self, hostname, user="root", port=22, initialize=True,
mblighf4e04152008-02-21 16:05:53 +000055 conmux_log="console.log",
mblighe6c995f2007-10-26 19:43:01 +000056 conmux_server=None, conmux_attach=None,
mblighda13d542008-01-03 16:28:34 +000057 netconsole_log=None, netconsole_port=6666, autodir=None):
mbligh7d2bde82007-08-02 16:26:10 +000058 """
59 Construct a SSHHost object
mblighdcd57a82007-07-11 23:06:47 +000060
61 Args:
62 hostname: network hostname or address of remote machine
63 user: user to log in as on the remote machine
64 port: port the ssh daemon is listening on on the remote
65 machine
mbligh9708f732007-10-18 03:18:54 +000066 """
mblighdcd57a82007-07-11 23:06:47 +000067 self.hostname= hostname
68 self.user= user
69 self.port= port
70 self.tmp_dirs= []
mbligh137a05c2007-10-04 15:56:51 +000071 self.initialize = initialize
mblighda13d542008-01-03 16:28:34 +000072 self.autodir = autodir
mbligh91334902007-09-28 01:47:59 +000073
mbligh9708f732007-10-18 03:18:54 +000074 super(SSHHost, self).__init__()
75
mbligh3409ee72007-10-16 23:58:33 +000076 self.conmux_server = conmux_server
mbligh70cf0ec2008-01-18 17:57:14 +000077 if conmux_attach:
78 self.conmux_attach = conmux_attach
79 else:
80 self.conmux_attach = os.path.abspath(os.path.join(
81 self.serverdir, '..',
82 'conmux', 'conmux-attach'))
mblighfbb03542008-02-11 16:27:29 +000083 self.logger_popen = None
mblighf4e04152008-02-21 16:05:53 +000084 self.warning_stream = None
mblighde384372007-10-17 04:25:37 +000085 self.__start_console_log(conmux_log)
mbligh3409ee72007-10-16 23:58:33 +000086
mbligha0452c82007-08-08 20:24:57 +000087 self.bootloader = bootloader.Bootloader(self)
mbligh7d2bde82007-08-02 16:26:10 +000088
mblighc0e92392007-11-05 19:10:10 +000089 self.__netconsole_param = ""
mblighfbb03542008-02-11 16:27:29 +000090 self.netlogger_popen = None
mblighc0e92392007-11-05 19:10:10 +000091 if netconsole_log:
92 self.__init_netconsole_params(netconsole_port)
93 self.__start_netconsole_log(netconsole_log, netconsole_port)
94 self.__load_netconsole_module()
mblighde384372007-10-17 04:25:37 +000095
mbligh7d2bde82007-08-02 16:26:10 +000096
mblighfbb03542008-02-11 16:27:29 +000097 @staticmethod
mblighf4e04152008-02-21 16:05:53 +000098 def __kill(popen):
mblighfbb03542008-02-11 16:27:29 +000099 return_code = popen.poll()
mblighf4e04152008-02-21 16:05:53 +0000100 if return_code is None:
mblighfbb03542008-02-11 16:27:29 +0000101 try:
mblighf4e04152008-02-21 16:05:53 +0000102 os.kill(popen.pid, signal.SIGTERM)
mblighfbb03542008-02-11 16:27:29 +0000103 except OSError:
104 pass
105
106
mblighdcd57a82007-07-11 23:06:47 +0000107 def __del__(self):
mbligh7d2bde82007-08-02 16:26:10 +0000108 """
109 Destroy a SSHHost object
mblighdcd57a82007-07-11 23:06:47 +0000110 """
111 for dir in self.tmp_dirs:
112 try:
113 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
mblighf5427bb2008-04-09 15:55:57 +0000114 except error.AutoservRunError:
mblighdcd57a82007-07-11 23:06:47 +0000115 pass
mblighde384372007-10-17 04:25:37 +0000116 # kill the console logger
mblighfbb03542008-02-11 16:27:29 +0000117 if getattr(self, 'logger_popen', None):
mblighf4e04152008-02-21 16:05:53 +0000118 self.__kill(self.logger_popen)
mbligh6607d192008-04-17 15:23:15 +0000119 if self.job:
120 self.job.warning_loggers.discard(
121 self.warning_stream)
mblighf4e04152008-02-21 16:05:53 +0000122 self.warning_stream.close()
mblighde384372007-10-17 04:25:37 +0000123 # kill the netconsole logger
mblighfbb03542008-02-11 16:27:29 +0000124 if getattr(self, 'netlogger_popen', None):
mblighe6c995f2007-10-26 19:43:01 +0000125 self.__unload_netconsole_module()
mblighf4e04152008-02-21 16:05:53 +0000126 self.__kill(self.netlogger_popen)
mblighde384372007-10-17 04:25:37 +0000127
128
129 def __init_netconsole_params(self, port):
130 """
131 Connect to the remote machine and determine the values to use for the
132 required netconsole parameters.
133 """
mblighde384372007-10-17 04:25:37 +0000134 # PROBLEM: on machines with multiple IPs this may not make any sense
135 # It also doesn't work with IPv6
136 remote_ip = socket.gethostbyname(self.hostname)
137 local_ip = socket.gethostbyname(socket.gethostname())
138 # Get the gateway of the remote machine
139 try:
140 traceroute = self.run('traceroute -n %s' % local_ip)
mblighf5427bb2008-04-09 15:55:57 +0000141 except error.AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000142 return
143 first_node = traceroute.stdout.split("\n")[0]
144 match = re.search(r'\s+((\d+\.){3}\d+)\s+', first_node)
145 if match:
146 router_ip = match.group(1)
147 else:
148 return
149 # Look up the MAC address of the gateway
150 try:
151 self.run('ping -c 1 %s' % router_ip)
152 arp = self.run('arp -n -a %s' % router_ip)
mblighf5427bb2008-04-09 15:55:57 +0000153 except error.AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000154 return
155 match = re.search(r'\s+(([0-9A-F]{2}:){5}[0-9A-F]{2})\s+', arp.stdout)
156 if match:
157 gateway_mac = match.group(1)
158 else:
159 return
160 self.__netconsole_param = 'netconsole=@%s/,%s@%s/%s' % (remote_ip,
161 port,
162 local_ip,
163 gateway_mac)
164
165
166 def __start_netconsole_log(self, logfilename, port):
167 """
168 Log the output of netconsole to a specified file
169 """
170 if logfilename == None:
171 return
172 cmd = ['nc', '-u', '-l', '-p', str(port)]
mblighfbb03542008-02-11 16:27:29 +0000173 logfile = open(logfilename, 'a', 0)
174 self.netlogger_popen = subprocess.Popen(cmd, stdout=logfile)
mblighde384372007-10-17 04:25:37 +0000175
176
177 def __load_netconsole_module(self):
178 """
179 Make a best effort to load the netconsole module.
180
181 Note that loading the module can fail even when the remote machine is
182 working correctly if netconsole is already compiled into the kernel
183 and started.
184 """
mblighc0e92392007-11-05 19:10:10 +0000185 if not self.__netconsole_param:
186 return
mblighde384372007-10-17 04:25:37 +0000187 try:
188 self.run('modprobe netconsole %s' % self.__netconsole_param)
mblighf5427bb2008-04-09 15:55:57 +0000189 except error.AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000190 # if it fails there isn't much we can do, just keep going
191 pass
192
193
194 def __unload_netconsole_module(self):
195 try:
196 self.run('modprobe -r netconsole')
mblighf5427bb2008-04-09 15:55:57 +0000197 except error.AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000198 pass
mbligh3409ee72007-10-16 23:58:33 +0000199
200
mbligh5deff3d2008-01-04 21:21:28 +0000201 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT):
mblighd567f722007-10-30 15:37:33 +0000202 if not self.wait_down(300): # Make sure he's dead, Jim
mblighf3b78932007-11-07 16:52:47 +0000203 self.__record("ABORT", None, "reboot.verify", "shutdown failed")
mblighf5427bb2008-04-09 15:55:57 +0000204 raise error.AutoservRebootError(
205 "Host did not shut down")
mbligh3409ee72007-10-16 23:58:33 +0000206 self.wait_up(timeout)
207 time.sleep(2) # this is needed for complete reliability
mblighcf3d83a2007-11-05 19:21:39 +0000208 if self.wait_up(timeout):
mbligh30270302007-11-05 20:33:52 +0000209 self.__record("GOOD", None, "reboot.verify")
mblighcf3d83a2007-11-05 19:21:39 +0000210 else:
mbligh71d24222008-03-11 21:31:56 +0000211 self.__record("ABORT", None, "reboot.verify", "Host did not return from reboot")
mblighf5427bb2008-04-09 15:55:57 +0000212 raise error.AutoservRebootError(
213 "Host did not return from reboot")
mbligh3409ee72007-10-16 23:58:33 +0000214 print "Reboot complete"
215
216
mbligh80d20772007-10-29 17:10:10 +0000217 def hardreset(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True):
mbligh3409ee72007-10-16 23:58:33 +0000218 """
219 Reach out and slap the box in the power switch
220 """
mblighf3b78932007-11-07 16:52:47 +0000221 if not self.__console_run(r"'~$hardreset'"):
222 self.__record("ABORT", None, "reboot.start", "hard reset unavailable")
mblighf5427bb2008-04-09 15:55:57 +0000223 raise error.AutoservUnsupportedError(
224 'Hard reset unavailable')
mbligh37d53c32008-01-14 16:16:00 +0000225
226 if wait:
227 self.wait_for_restart(timeout)
mbligha4d4f372008-01-22 15:49:50 +0000228 self.__record("GOOD", None, "reboot.start", "hard reset")
mbligh3409ee72007-10-16 23:58:33 +0000229
230
mblighe6c995f2007-10-26 19:43:01 +0000231 def __conmux_hostname(self):
232 if self.conmux_server:
233 return '%s/%s' % (self.conmux_server, self.hostname)
234 else:
235 return self.hostname
236
237
mbligh3409ee72007-10-16 23:58:33 +0000238 def __start_console_log(self, logfilename):
239 """
240 Log the output of the console session to a specified file
241 """
242 if logfilename == None:
243 return
244 if not self.conmux_attach or not os.path.exists(self.conmux_attach):
245 return
mblighf4e04152008-02-21 16:05:53 +0000246
247 r, w = os.pipe()
248 script_path = os.path.join(self.serverdir,
249 'warning_monitor.py')
mblighfbb03542008-02-11 16:27:29 +0000250 cmd = [self.conmux_attach, self.__conmux_hostname(),
mblighf4e04152008-02-21 16:05:53 +0000251 '%s %s %s %d' % (sys.executable, script_path,
252 logfilename, w)]
mbligh0c5ce312008-02-21 16:24:11 +0000253 dev_null = open(os.devnull, 'w')
mbligh3409ee72007-10-16 23:58:33 +0000254
mblighf4e04152008-02-21 16:05:53 +0000255 self.warning_stream = os.fdopen(r, 'r', 0)
mbligh6607d192008-04-17 15:23:15 +0000256 if self.job:
257 self.job.warning_loggers.add(self.warning_stream)
mblighf4e04152008-02-21 16:05:53 +0000258 self.logger_popen = subprocess.Popen(cmd, stderr=dev_null)
259 os.close(w)
mblighe6c995f2007-10-26 19:43:01 +0000260
261
mbligh3409ee72007-10-16 23:58:33 +0000262 def __console_run(self, cmd):
263 """
264 Send a command to the conmux session
265 """
266 if not self.conmux_attach or not os.path.exists(self.conmux_attach):
267 return False
mbligh3409ee72007-10-16 23:58:33 +0000268 cmd = '%s %s echo %s 2> /dev/null' % (self.conmux_attach,
mblighe6c995f2007-10-26 19:43:01 +0000269 self.__conmux_hostname(),
mbligh3409ee72007-10-16 23:58:33 +0000270 cmd)
mbligh0f5ad642008-01-22 16:37:40 +0000271 result = utils.system(cmd, ignore_status=True)
mbligh3409ee72007-10-16 23:58:33 +0000272 return result == 0
mbligh7d2bde82007-08-02 16:26:10 +0000273
274
mbligh31a49de2007-11-05 18:41:19 +0000275 def __record(self, status_code, subdir, operation, status = ''):
276 if self.job:
277 self.job.record(status_code, subdir, operation, status)
278 else:
279 if not subdir:
280 subdir = "----"
281 msg = "%s\t%s\t%s\t%s" % (status_code, subdir, operation, status)
282 sys.stderr.write(msg + "\n")
283
284
mblighfa971602008-01-03 01:57:20 +0000285 def ssh_base_command(self, connect_timeout=30):
286 SSH_BASE_COMMAND = '/usr/bin/ssh -a -x -o ' + \
mbligh0ad21ba2008-03-14 15:06:21 +0000287 'BatchMode=yes -o ConnectTimeout=%d ' + \
288 '-o ServerAliveInterval=300'
mblighfa971602008-01-03 01:57:20 +0000289 assert isinstance(connect_timeout, (int, long))
290 assert connect_timeout > 0 # can't disable the timeout
291 return SSH_BASE_COMMAND % connect_timeout
292
293
294 def ssh_command(self, connect_timeout=30):
mblighe6647d12007-10-17 00:00:01 +0000295 """Construct an ssh command with proper args for this host."""
mblighfa971602008-01-03 01:57:20 +0000296 ssh = self.ssh_base_command(connect_timeout)
297 return r'%s -l %s -p %d %s' % (ssh,
mbligh0faf91f2007-10-18 03:10:48 +0000298 self.user,
299 self.port,
300 self.hostname)
mblighe6647d12007-10-17 00:00:01 +0000301
302
mbligh07a923f2008-01-16 17:49:04 +0000303 def run(self, command, timeout=3600, ignore_status=False,
mblighfa971602008-01-03 01:57:20 +0000304 stdout_tee=None, stderr_tee=None, connect_timeout=30):
mbligh7d2bde82007-08-02 16:26:10 +0000305 """
306 Run a command on the remote host.
mblighdcd57a82007-07-11 23:06:47 +0000307
308 Args:
309 command: the command line string
310 timeout: time limit in seconds before attempting to
311 kill the running process. The run() function
312 will take a few seconds longer than 'timeout'
313 to complete if it has to kill the process.
mbligh8b85dfb2007-08-28 09:50:31 +0000314 ignore_status: do not raise an exception, no matter
315 what the exit code of the command is.
mblighdcd57a82007-07-11 23:06:47 +0000316
317 Returns:
318 a hosts.base_classes.CmdResult object
319
320 Raises:
321 AutoservRunError: the exit code of the command
322 execution was not 0
mblighcaa62c22008-04-07 21:51:17 +0000323 AutoservSSHTimeout: ssh connection has timed out
mblighdcd57a82007-07-11 23:06:47 +0000324 """
mblighadf2aab2007-11-29 18:16:43 +0000325 stdout = stdout_tee or sys.stdout
mbligh8d4baaa2008-03-12 14:48:24 +0000326 stderr = stderr_tee or sys.stdout
mbligh7995cc62007-11-30 15:53:23 +0000327 print "ssh: %s" % (command,)
mblighadf2aab2007-11-29 18:16:43 +0000328 env = " ".join("=".join(pair) for pair in self.env.iteritems())
mbligh34faa282008-01-16 17:44:49 +0000329 full_cmd = '%s "%s %s"' % (self.ssh_command(connect_timeout),
330 env, utils.sh_escape(command))
331 result = utils.run(full_cmd, timeout, True, stdout, stderr)
332 if result.exit_status == 255: # ssh's exit status for timeout
333 if re.match(r'^ssh: connect to host .* port .*: ' +
334 r'Connection timed out\r$', result.stderr):
mblighf5427bb2008-04-09 15:55:57 +0000335 raise error.AutoservSSHTimeout("ssh timed out",
336 result)
mbligh34faa282008-01-16 17:44:49 +0000337 if not ignore_status and result.exit_status > 0:
mblighf5427bb2008-04-09 15:55:57 +0000338 raise error.AutoservRunError("command execution error",
339 result)
mblighdcd57a82007-07-11 23:06:47 +0000340 return result
mbligh7d2bde82007-08-02 16:26:10 +0000341
342
mblighbda9c9c2008-04-08 17:45:00 +0000343 def run_short(self, command, **kwargs):
344 """
345 Calls the run() command with a short default timeout.
346
347 Args:
348 Takes the same arguments as does run(),
349 with the exception of the timeout argument which
350 here is fixed at 60 seconds.
351 It returns the result of run.
352 """
353 return self.run(command, timeout=60, **kwargs)
354
355
mbligh78669ff2008-01-10 16:33:07 +0000356 def run_grep(self, command, timeout=30, ignore_status=False,
357 stdout_ok_regexp=None, stdout_err_regexp=None,
358 stderr_ok_regexp=None, stderr_err_regexp=None,
359 connect_timeout=30):
360 """
361 Run a command on the remote host and look for regexp
362 in stdout or stderr to determine if the command was
363 successul or not.
mbligh6a2a2df2008-01-16 17:41:55 +0000364
mbligh78669ff2008-01-10 16:33:07 +0000365 Args:
366 command: the command line string
mbligh6a2a2df2008-01-16 17:41:55 +0000367 timeout: time limit in seconds before attempting to
mbligh78669ff2008-01-10 16:33:07 +0000368 kill the running process. The run() function
369 will take a few seconds longer than 'timeout'
370 to complete if it has to kill the process.
mbligh6a2a2df2008-01-16 17:41:55 +0000371 ignore_status: do not raise an exception, no matter
mbligh78669ff2008-01-10 16:33:07 +0000372 what the exit code of the command is.
373 stdout_ok_regexp: regexp that should be in stdout
374 if the command was successul.
375 stdout_err_regexp: regexp that should be in stdout
376 if the command failed.
377 stderr_ok_regexp: regexp that should be in stderr
378 if the command was successul.
379 stderr_err_regexp: regexp that should be in stderr
380 if the command failed.
mbligh6a2a2df2008-01-16 17:41:55 +0000381
mbligh78669ff2008-01-10 16:33:07 +0000382 Returns:
383 if the command was successul, raises an exception
384 otherwise.
mbligh6a2a2df2008-01-16 17:41:55 +0000385
mbligh78669ff2008-01-10 16:33:07 +0000386 Raises:
387 AutoservRunError:
388 - the exit code of the command execution was not 0.
mbligh6a2a2df2008-01-16 17:41:55 +0000389 - If stderr_err_regexp is found in stderr,
390 - If stdout_err_regexp is found in stdout,
mbligh78669ff2008-01-10 16:33:07 +0000391 - If stderr_ok_regexp is not found in stderr.
392 - If stdout_ok_regexp is not found in stdout,
393 """
394
395 # We ignore the status, because we will handle it at the end.
396 result = self.run(command, timeout, ignore_status=True,
mbligh6a2a2df2008-01-16 17:41:55 +0000397 connect_timeout=connect_timeout)
mbligh78669ff2008-01-10 16:33:07 +0000398
399 # Look for the patterns, in order
400 for (regexp, stream) in ((stderr_err_regexp, result.stderr),
mbligh6a2a2df2008-01-16 17:41:55 +0000401 (stdout_err_regexp, result.stdout)):
mbligh78669ff2008-01-10 16:33:07 +0000402 if regexp and stream:
403 err_re = re.compile (regexp)
404 if err_re.search(stream):
mblighf5427bb2008-04-09 15:55:57 +0000405 raise error.AutoservRunError(
mbligh6a2a2df2008-01-16 17:41:55 +0000406 '%s failed, found error pattern: '
407 '"%s"' % (command, regexp), result)
mbligh78669ff2008-01-10 16:33:07 +0000408
409 for (regexp, stream) in ((stderr_ok_regexp, result.stderr),
mbligh6a2a2df2008-01-16 17:41:55 +0000410 (stdout_ok_regexp, result.stdout)):
mbligh78669ff2008-01-10 16:33:07 +0000411 if regexp and stream:
412 ok_re = re.compile (regexp)
413 if ok_re.search(stream):
414 if ok_re.search(stream):
415 return
416
417 if not ignore_status and result.exit_status > 0:
mblighf5427bb2008-04-09 15:55:57 +0000418 raise error.AutoservRunError("command execution error",
419 result)
mbligh78669ff2008-01-10 16:33:07 +0000420
421
mbligh80d20772007-10-29 17:10:10 +0000422 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, label=None,
423 kernel_args=None, wait=True):
mbligh7d2bde82007-08-02 16:26:10 +0000424 """
425 Reboot the remote host.
mbligh8b85dfb2007-08-28 09:50:31 +0000426
mbligha0452c82007-08-08 20:24:57 +0000427 Args:
428 timeout
mbligh8b85dfb2007-08-28 09:50:31 +0000429 """
mbligh33ae0902007-11-24 19:27:08 +0000430 self.reboot_setup()
431
mblighde384372007-10-17 04:25:37 +0000432 # forcibly include the "netconsole" kernel arg
433 if self.__netconsole_param:
434 if kernel_args is None:
435 kernel_args = self.__netconsole_param
436 else:
437 kernel_args += " " + self.__netconsole_param
438 # unload the (possibly loaded) module to avoid shutdown issues
439 self.__unload_netconsole_module()
mbligha0452c82007-08-08 20:24:57 +0000440 if label or kernel_args:
441 self.bootloader.install_boottool()
442 if label:
443 self.bootloader.set_default(label)
444 if kernel_args:
445 if not label:
446 default = int(self.bootloader.get_default())
447 label = self.bootloader.get_titles()[default]
448 self.bootloader.add_args(label, kernel_args)
mblighd742a222007-09-30 01:27:06 +0000449 print "Reboot: initiating reboot"
mbligh30270302007-11-05 20:33:52 +0000450 self.__record("GOOD", None, "reboot.start")
mblighcf3d83a2007-11-05 19:21:39 +0000451 try:
mblighf3b78932007-11-07 16:52:47 +0000452 self.run('(sleep 5; reboot) </dev/null >/dev/null 2>&1 &')
mblighf5427bb2008-04-09 15:55:57 +0000453 except error.AutoservRunError:
mblighf3b78932007-11-07 16:52:47 +0000454 self.__record("ABORT", None, "reboot.start",
455 "reboot command failed")
mblighcf3d83a2007-11-05 19:21:39 +0000456 raise
mbligha0452c82007-08-08 20:24:57 +0000457 if wait:
mbligh7f2befb2008-04-21 20:47:10 +0000458 self.wait_for_restart(timeout)
459 self.reboot_followup()
460
461
462 def reboot_followup(self):
463 super(SSHHost, self).reboot_followup()
464 self.__load_netconsole_module() # if the builtin fails
mbligha0452c82007-08-08 20:24:57 +0000465
mbligh7d2bde82007-08-02 16:26:10 +0000466
mblighcfc7ab32008-01-25 16:35:28 +0000467 def __copy_files(self, sources, dest):
468 """
469 Copy files from one machine to another.
470
471 This is for internal use by other methods that intend to move
472 files between machines. It expects a list of source files and
473 a destination (a filename if the source is a single file, a
474 destination otherwise). The names must already be
475 pre-processed into the appropriate rsync/scp friendly
476 format (%s@%s:%s).
477 """
478 # wait until there are only a small number of copies running
479 # before starting this one
480 MAXIMUM_SIMULTANEOUS_COPIES = 4
481 while True:
482 copy_count = 0
483 procs = utils.system_output('ps -ef')
484 for line in procs:
485 if 'rsync ' in line or 'scp ' in line:
486 copy_count += 1
487 if copy_count < MAXIMUM_SIMULTANEOUS_COPIES:
488 break
489 time.sleep(60)
490
mbligh22fdf172008-04-07 18:34:56 +0000491 print '__copy_files: copying %s to %s' % (sources, dest)
mblighcfc7ab32008-01-25 16:35:28 +0000492 try:
493 utils.run('rsync --rsh="%s" -az %s %s' % (
494 self.ssh_base_command(), ' '.join(sources), dest))
495 except Exception:
496 utils.run('scp -rpq %s "%s"' % (
497 ' '.join(sources), dest))
498
499
mblighdcd57a82007-07-11 23:06:47 +0000500 def get_file(self, source, dest):
mbligh7d2bde82007-08-02 16:26:10 +0000501 """
502 Copy files from the remote host to a local path.
mblighdcd57a82007-07-11 23:06:47 +0000503
504 Directories will be copied recursively.
505 If a source component is a directory with a trailing slash,
506 the content of the directory will be copied, otherwise, the
507 directory itself and its content will be copied. This
508 behavior is similar to that of the program 'rsync'.
509
510 Args:
511 source: either
512 1) a single file or directory, as a string
513 2) a list of one or more (possibly mixed)
514 files or directories
515 dest: a file or a directory (if source contains a
516 directory or more than one element, you must
517 supply a directory dest)
518
519 Raises:
520 AutoservRunError: the scp command failed
521 """
522 if isinstance(source, types.StringTypes):
523 source= [source]
524
525 processed_source= []
526 for entry in source:
527 if entry.endswith('/'):
528 format_string= '%s@%s:"%s*"'
529 else:
530 format_string= '%s@%s:"%s"'
531 entry= format_string % (self.user, self.hostname,
532 utils.scp_remote_escape(entry))
533 processed_source.append(entry)
534
535 processed_dest= os.path.abspath(dest)
536 if os.path.isdir(dest):
537 processed_dest= "%s/" % (utils.sh_escape(processed_dest),)
538 else:
539 processed_dest= utils.sh_escape(processed_dest)
mblighcfc7ab32008-01-25 16:35:28 +0000540
541 self.__copy_files(processed_source, processed_dest)
mbligh7d2bde82007-08-02 16:26:10 +0000542
543
mblighdcd57a82007-07-11 23:06:47 +0000544 def send_file(self, source, dest):
mbligh7d2bde82007-08-02 16:26:10 +0000545 """
546 Copy files from a local path to the remote host.
mblighdcd57a82007-07-11 23:06:47 +0000547
548 Directories will be copied recursively.
549 If a source component is a directory with a trailing slash,
550 the content of the directory will be copied, otherwise, the
551 directory itself and its content will be copied. This
552 behavior is similar to that of the program 'rsync'.
553
554 Args:
555 source: either
556 1) a single file or directory, as a string
557 2) a list of one or more (possibly mixed)
558 files or directories
559 dest: a file or a directory (if source contains a
560 directory or more than one element, you must
561 supply a directory dest)
562
563 Raises:
564 AutoservRunError: the scp command failed
565 """
566 if isinstance(source, types.StringTypes):
567 source= [source]
568
569 processed_source= []
570 for entry in source:
571 if entry.endswith('/'):
572 format_string= '"%s/"*'
573 else:
574 format_string= '"%s"'
575 entry= format_string % (utils.sh_escape(os.path.abspath(entry)),)
576 processed_source.append(entry)
mbligh7d2bde82007-08-02 16:26:10 +0000577
mbligh0faf91f2007-10-18 03:10:48 +0000578 remote_dest = '%s@%s:"%s"' % (
579 self.user, self.hostname,
580 utils.scp_remote_escape(dest))
mblighcfc7ab32008-01-25 16:35:28 +0000581
582 self.__copy_files(processed_source, remote_dest)
mbligha4eb0fa2008-04-11 15:16:50 +0000583 self.run('find "%s" -type d | xargs -i -r chmod o+rx "{}"' % dest)
584 self.run('find "%s" -type f | xargs -i -r chmod o+r "{}"' % dest)
mbligh7d2bde82007-08-02 16:26:10 +0000585
mblighdcd57a82007-07-11 23:06:47 +0000586 def get_tmp_dir(self):
mbligh7d2bde82007-08-02 16:26:10 +0000587 """
588 Return the pathname of a directory on the host suitable
mblighdcd57a82007-07-11 23:06:47 +0000589 for temporary file storage.
590
591 The directory and its content will be deleted automatically
592 on the destruction of the Host object that was used to obtain
593 it.
594 """
mbligha25b29e2007-08-26 13:58:04 +0000595 dir_name= self.run("mktemp -d /tmp/autoserv-XXXXXX").stdout.rstrip(" \n")
mblighdcd57a82007-07-11 23:06:47 +0000596 self.tmp_dirs.append(dir_name)
597 return dir_name
mbligh7d2bde82007-08-02 16:26:10 +0000598
599
mblighdcd57a82007-07-11 23:06:47 +0000600 def is_up(self):
mbligh7d2bde82007-08-02 16:26:10 +0000601 """
602 Check if the remote host is up.
mblighdcd57a82007-07-11 23:06:47 +0000603
604 Returns:
605 True if the remote host is up, False otherwise
606 """
607 try:
mbligh4cfa76a2007-11-26 20:45:16 +0000608 self.ssh_ping()
mbligheadfbb12007-11-26 23:03:12 +0000609 except:
mblighdcd57a82007-07-11 23:06:47 +0000610 return False
mbligheadfbb12007-11-26 23:03:12 +0000611 return True
mbligh7d2bde82007-08-02 16:26:10 +0000612
mbligh7d2bde82007-08-02 16:26:10 +0000613
mblighdcd57a82007-07-11 23:06:47 +0000614 def wait_up(self, timeout=None):
mbligh7d2bde82007-08-02 16:26:10 +0000615 """
616 Wait until the remote host is up or the timeout expires.
mblighdcd57a82007-07-11 23:06:47 +0000617
618 In fact, it will wait until an ssh connection to the remote
619 host can be established.
620
621 Args:
622 timeout: time limit in seconds before returning even
623 if the host is not up.
624
625 Returns:
626 True if the host was found to be up, False otherwise
627 """
628 if timeout:
629 end_time= time.time() + timeout
630
631 while not timeout or time.time() < end_time:
632 try:
mbligh4cfa76a2007-11-26 20:45:16 +0000633 self.ssh_ping()
mbligheadfbb12007-11-26 23:03:12 +0000634 except:
mblighdcd57a82007-07-11 23:06:47 +0000635 pass
636 else:
mbligheadfbb12007-11-26 23:03:12 +0000637 return True
mblighdcd57a82007-07-11 23:06:47 +0000638 time.sleep(1)
639
640 return False
mbligh7d2bde82007-08-02 16:26:10 +0000641
642
mblighdcd57a82007-07-11 23:06:47 +0000643 def wait_down(self, timeout=None):
mbligh7d2bde82007-08-02 16:26:10 +0000644 """
645 Wait until the remote host is down or the timeout expires.
mblighdcd57a82007-07-11 23:06:47 +0000646
647 In fact, it will wait until an ssh connection to the remote
648 host fails.
649
650 Args:
651 timeout: time limit in seconds before returning even
652 if the host is not up.
653
654 Returns:
655 True if the host was found to be down, False otherwise
656 """
657 if timeout:
658 end_time= time.time() + timeout
659
660 while not timeout or time.time() < end_time:
661 try:
mbligh4cfa76a2007-11-26 20:45:16 +0000662 self.ssh_ping()
mbligheadfbb12007-11-26 23:03:12 +0000663 except:
mblighdcd57a82007-07-11 23:06:47 +0000664 return True
mblighdcd57a82007-07-11 23:06:47 +0000665 time.sleep(1)
666
667 return False
mbligh7d2bde82007-08-02 16:26:10 +0000668
669
mblighdbe4a382007-07-26 19:41:28 +0000670 def ensure_up(self):
mbligh7d2bde82007-08-02 16:26:10 +0000671 """
672 Ensure the host is up if it is not then do not proceed;
673 this prevents cacading failures of tests
674 """
mbligha0452c82007-08-08 20:24:57 +0000675 print 'Ensuring that %s is up before continuing' % self.hostname
676 if hasattr(self, 'hardreset') and not self.wait_up(300):
mblighdbe4a382007-07-26 19:41:28 +0000677 print "Performing a hardreset on %s" % self.hostname
mbligh4ba0b462007-11-05 23:05:40 +0000678 try:
679 self.hardreset()
mblighf5427bb2008-04-09 15:55:57 +0000680 except error.AutoservUnsupportedError:
mbligh4ba0b462007-11-05 23:05:40 +0000681 print "Hardreset is unsupported on %s" % self.hostname
mbligha9563b92007-10-25 14:45:56 +0000682 if not self.wait_up(60 * 30):
683 # 30 minutes should be more than enough
mblighf5427bb2008-04-09 15:55:57 +0000684 raise error.AutoservHostError
mbligha0452c82007-08-08 20:24:57 +0000685 print 'Host up, continuing'
mbligh7d2bde82007-08-02 16:26:10 +0000686
687
mblighdcd57a82007-07-11 23:06:47 +0000688 def get_num_cpu(self):
mbligh7d2bde82007-08-02 16:26:10 +0000689 """
690 Get the number of CPUs in the host according to
mblighdcd57a82007-07-11 23:06:47 +0000691 /proc/cpuinfo.
692
693 Returns:
694 The number of CPUs
695 """
696
mbligh0ba35792008-04-15 19:16:11 +0000697 proc_cpuinfo = self.run("cat /proc/cpuinfo",
698 stdout_tee=open('/dev/null', 'w')).stdout
mblighdcd57a82007-07-11 23:06:47 +0000699 cpus = 0
700 for line in proc_cpuinfo.splitlines():
701 if line.startswith('processor'):
702 cpus += 1
703 return cpus
mbligh5f876ad2007-10-12 23:59:53 +0000704
705
706 def check_uptime(self):
707 """
708 Check that uptime is available and monotonically increasing.
709 """
710 if not self.ping():
mblighf5427bb2008-04-09 15:55:57 +0000711 raise error.AutoservHostError('Client is not pingable')
mbligh5f876ad2007-10-12 23:59:53 +0000712 result = self.run("/bin/cat /proc/uptime", 30)
713 return result.stdout.strip().split()[0]
714
715
716 def get_arch(self):
717 """
718 Get the hardware architecture of the remote machine
719 """
720 arch = self.run('/bin/uname -m').stdout.rstrip()
721 if re.match(r'i\d86$', arch):
722 arch = 'i386'
723 return arch
724
725
726 def get_kernel_ver(self):
727 """
728 Get the kernel version of the remote machine
729 """
730 return self.run('/bin/uname -r').stdout.rstrip()
731
732
733 def get_cmdline(self):
734 """
735 Get the kernel command line of the remote machine
736 """
737 return self.run('cat /proc/cmdline').stdout.rstrip()
738
739
740 def ping(self):
741 """
742 Ping the remote system, and return whether it's available
743 """
744 fpingcmd = "%s -q %s" % ('/usr/bin/fping', self.hostname)
745 rc = utils.system(fpingcmd, ignore_status = 1)
746 return (rc == 0)
mblighd2e46052007-11-05 18:31:00 +0000747
mblighf014ff42007-11-26 21:33:11 +0000748
mbligh4cfa76a2007-11-26 20:45:16 +0000749 def ssh_ping(self, timeout = 60):
mbligh4ff46b02008-02-01 17:33:37 +0000750 self.run('true', timeout = timeout, connect_timeout = timeout)
mblighda13d542008-01-03 16:28:34 +0000751
752
753 def get_autodir(self):
754 return self.autodir