blob: cffdefd94ef4e2147f651dc4a8f4adb003ba84c6 [file] [log] [blame]
mblighdcd57a82007-07-11 23:06:47 +00001#!/usr/bin/python
2#
3# Copyright 2007 Google Inc. Released under the GPL v2
4
mbligh7d2bde82007-08-02 16:26:10 +00005"""
6This module defines the SSHHost class.
mblighdcd57a82007-07-11 23:06:47 +00007
8Implementation details:
9You should import the "hosts" package instead of importing each type of host.
10
11 SSHHost: a remote machine with a ssh access
12"""
13
mbligh7d2bde82007-08-02 16:26:10 +000014__author__ = """
15mbligh@google.com (Martin J. Bligh),
mblighdcd57a82007-07-11 23:06:47 +000016poirier@google.com (Benjamin Poirier),
mbligh7d2bde82007-08-02 16:26:10 +000017stutsman@google.com (Ryan Stutsman)
18"""
mblighdcd57a82007-07-11 23:06:47 +000019
20
mblighde384372007-10-17 04:25:37 +000021import types, os, sys, signal, subprocess, time, re, socket
mbligh03f4fc72007-11-29 20:56:14 +000022import base_classes, utils, bootloader
23
24from common.error import *
mblighdcd57a82007-07-11 23:06:47 +000025
26
27class SSHHost(base_classes.RemoteHost):
mbligh7d2bde82007-08-02 16:26:10 +000028 """
29 This class represents a remote machine controlled through an ssh
mblighdcd57a82007-07-11 23:06:47 +000030 session on which you can run programs.
mbligh7d2bde82007-08-02 16:26:10 +000031
mblighdcd57a82007-07-11 23:06:47 +000032 It is not the machine autoserv is running on. The machine must be
33 configured for password-less login, for example through public key
34 authentication.
mbligh7d2bde82007-08-02 16:26:10 +000035
mbligh3409ee72007-10-16 23:58:33 +000036 It includes support for controlling the machine through a serial
37 console on which you can run programs. If such a serial console is
38 set up on the machine then capabilities such as hard reset and
39 boot strap monitoring are available. If the machine does not have a
40 serial console available then ordinary SSH-based commands will
41 still be available, but attempts to use extensions such as
42 console logging or hard reset will fail silently.
43
mblighdcd57a82007-07-11 23:06:47 +000044 Implementation details:
45 This is a leaf class in an abstract class hierarchy, it must
46 implement the unimplemented methods in parent classes.
47 """
mbligh7d2bde82007-08-02 16:26:10 +000048
mbligh31a49de2007-11-05 18:41:19 +000049 DEFAULT_REBOOT_TIMEOUT = 1800
50 job = None
mbligh0faf91f2007-10-18 03:10:48 +000051
mblighde384372007-10-17 04:25:37 +000052 def __init__(self, hostname, user="root", port=22, initialize=True,
mbligh7c5452d2007-11-05 18:35:31 +000053 conmux_log="console.log", conmux_warnings="status.log",
mblighe6c995f2007-10-26 19:43:01 +000054 conmux_server=None, conmux_attach=None,
mblighda13d542008-01-03 16:28:34 +000055 netconsole_log=None, netconsole_port=6666, autodir=None):
mbligh7d2bde82007-08-02 16:26:10 +000056 """
57 Construct a SSHHost object
mblighdcd57a82007-07-11 23:06:47 +000058
59 Args:
60 hostname: network hostname or address of remote machine
61 user: user to log in as on the remote machine
62 port: port the ssh daemon is listening on on the remote
63 machine
mbligh9708f732007-10-18 03:18:54 +000064 """
mblighdcd57a82007-07-11 23:06:47 +000065 self.hostname= hostname
66 self.user= user
67 self.port= port
68 self.tmp_dirs= []
mbligh137a05c2007-10-04 15:56:51 +000069 self.initialize = initialize
mblighda13d542008-01-03 16:28:34 +000070 self.autodir = autodir
mbligh91334902007-09-28 01:47:59 +000071
mbligh9708f732007-10-18 03:18:54 +000072 super(SSHHost, self).__init__()
73
mbligh3409ee72007-10-16 23:58:33 +000074 self.conmux_server = conmux_server
75 self.conmux_attach = self.__find_console_attach(conmux_attach)
76 self.logger_pid = None
mblighde384372007-10-17 04:25:37 +000077 self.__start_console_log(conmux_log)
mblighe6c995f2007-10-26 19:43:01 +000078 self.warning_pid = None
79 self.__start_warning_log(conmux_warnings)
mbligh3409ee72007-10-16 23:58:33 +000080
mbligha0452c82007-08-08 20:24:57 +000081 self.bootloader = bootloader.Bootloader(self)
mbligh7d2bde82007-08-02 16:26:10 +000082
mblighc0e92392007-11-05 19:10:10 +000083 self.__netconsole_param = ""
mblighde384372007-10-17 04:25:37 +000084 self.netlogger_pid = None
mblighc0e92392007-11-05 19:10:10 +000085 if netconsole_log:
86 self.__init_netconsole_params(netconsole_port)
87 self.__start_netconsole_log(netconsole_log, netconsole_port)
88 self.__load_netconsole_module()
mblighde384372007-10-17 04:25:37 +000089
mbligh7d2bde82007-08-02 16:26:10 +000090
mblighdcd57a82007-07-11 23:06:47 +000091 def __del__(self):
mbligh7d2bde82007-08-02 16:26:10 +000092 """
93 Destroy a SSHHost object
mblighdcd57a82007-07-11 23:06:47 +000094 """
95 for dir in self.tmp_dirs:
96 try:
97 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
mbligh03f4fc72007-11-29 20:56:14 +000098 except AutoservRunError:
mblighdcd57a82007-07-11 23:06:47 +000099 pass
mblighde384372007-10-17 04:25:37 +0000100 # kill the console logger
mbligh7364ae42007-10-18 03:20:34 +0000101 if getattr(self, 'logger_pid', None):
mbligh3409ee72007-10-16 23:58:33 +0000102 try:
103 pgid = os.getpgid(self.logger_pid)
104 os.killpg(pgid, signal.SIGTERM)
105 except OSError:
106 pass
mblighde384372007-10-17 04:25:37 +0000107 # kill the netconsole logger
mbligh7364ae42007-10-18 03:20:34 +0000108 if getattr(self, 'netlogger_pid', None):
mblighe6c995f2007-10-26 19:43:01 +0000109 self.__unload_netconsole_module()
mblighde384372007-10-17 04:25:37 +0000110 try:
111 os.kill(self.netlogger_pid, signal.SIGTERM)
112 except OSError:
113 pass
mblighe6c995f2007-10-26 19:43:01 +0000114 # kill the warning logger
115 if getattr(self, 'warning_pid', None):
116 try:
117 pgid = os.getpgid(self.warning_pid)
118 os.killpg(pgid, signal.SIGTERM)
119 except OSError:
120 pass
mblighde384372007-10-17 04:25:37 +0000121
122
123 def __init_netconsole_params(self, port):
124 """
125 Connect to the remote machine and determine the values to use for the
126 required netconsole parameters.
127 """
mblighde384372007-10-17 04:25:37 +0000128 # PROBLEM: on machines with multiple IPs this may not make any sense
129 # It also doesn't work with IPv6
130 remote_ip = socket.gethostbyname(self.hostname)
131 local_ip = socket.gethostbyname(socket.gethostname())
132 # Get the gateway of the remote machine
133 try:
134 traceroute = self.run('traceroute -n %s' % local_ip)
mbligh03f4fc72007-11-29 20:56:14 +0000135 except AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000136 return
137 first_node = traceroute.stdout.split("\n")[0]
138 match = re.search(r'\s+((\d+\.){3}\d+)\s+', first_node)
139 if match:
140 router_ip = match.group(1)
141 else:
142 return
143 # Look up the MAC address of the gateway
144 try:
145 self.run('ping -c 1 %s' % router_ip)
146 arp = self.run('arp -n -a %s' % router_ip)
mbligh03f4fc72007-11-29 20:56:14 +0000147 except AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000148 return
149 match = re.search(r'\s+(([0-9A-F]{2}:){5}[0-9A-F]{2})\s+', arp.stdout)
150 if match:
151 gateway_mac = match.group(1)
152 else:
153 return
154 self.__netconsole_param = 'netconsole=@%s/,%s@%s/%s' % (remote_ip,
155 port,
156 local_ip,
157 gateway_mac)
158
159
160 def __start_netconsole_log(self, logfilename, port):
161 """
162 Log the output of netconsole to a specified file
163 """
164 if logfilename == None:
165 return
166 cmd = ['nc', '-u', '-l', '-p', str(port)]
mblighd2fc50f2007-10-23 22:38:00 +0000167 logger = subprocess.Popen(cmd, stdout=open(logfilename, "a", 0))
mblighde384372007-10-17 04:25:37 +0000168 self.netlogger_pid = logger.pid
169
170
171 def __load_netconsole_module(self):
172 """
173 Make a best effort to load the netconsole module.
174
175 Note that loading the module can fail even when the remote machine is
176 working correctly if netconsole is already compiled into the kernel
177 and started.
178 """
mblighc0e92392007-11-05 19:10:10 +0000179 if not self.__netconsole_param:
180 return
mblighde384372007-10-17 04:25:37 +0000181 try:
182 self.run('modprobe netconsole %s' % self.__netconsole_param)
mbligh03f4fc72007-11-29 20:56:14 +0000183 except AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000184 # if it fails there isn't much we can do, just keep going
185 pass
186
187
188 def __unload_netconsole_module(self):
189 try:
190 self.run('modprobe -r netconsole')
mbligh03f4fc72007-11-29 20:56:14 +0000191 except AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000192 pass
mbligh3409ee72007-10-16 23:58:33 +0000193
194
mbligh5deff3d2008-01-04 21:21:28 +0000195 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT):
mblighd567f722007-10-30 15:37:33 +0000196 if not self.wait_down(300): # Make sure he's dead, Jim
mblighf3b78932007-11-07 16:52:47 +0000197 self.__record("ABORT", None, "reboot.verify", "shutdown failed")
mbligh03f4fc72007-11-29 20:56:14 +0000198 raise AutoservRebootError("Host did not shut down")
mbligh3409ee72007-10-16 23:58:33 +0000199 self.wait_up(timeout)
200 time.sleep(2) # this is needed for complete reliability
mblighcf3d83a2007-11-05 19:21:39 +0000201 if self.wait_up(timeout):
mbligh30270302007-11-05 20:33:52 +0000202 self.__record("GOOD", None, "reboot.verify")
mblighcf3d83a2007-11-05 19:21:39 +0000203 else:
mblighf3b78932007-11-07 16:52:47 +0000204 self.__record("ABORT", None, "reboot.verify", "bringup failed")
mbligh03f4fc72007-11-29 20:56:14 +0000205 raise AutoservRebootError("Host did not return from reboot")
mbligh3409ee72007-10-16 23:58:33 +0000206 print "Reboot complete"
207
208
mbligh80d20772007-10-29 17:10:10 +0000209 def hardreset(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True):
mbligh3409ee72007-10-16 23:58:33 +0000210 """
211 Reach out and slap the box in the power switch
212 """
mblighf3b78932007-11-07 16:52:47 +0000213 self.__record("GOOD", None, "reboot.start", "hard reset")
214 if not self.__console_run(r"'~$hardreset'"):
215 self.__record("ABORT", None, "reboot.start", "hard reset unavailable")
mbligh4d6feff2008-01-14 16:48:56 +0000216 raise AutoservUnsupportedError('Hard reset unavailable')
mbligh37d53c32008-01-14 16:16:00 +0000217
218 if wait:
219 self.wait_for_restart(timeout)
mbligh3409ee72007-10-16 23:58:33 +0000220
221
mblighe6c995f2007-10-26 19:43:01 +0000222 def __conmux_hostname(self):
223 if self.conmux_server:
224 return '%s/%s' % (self.conmux_server, self.hostname)
225 else:
226 return self.hostname
227
228
mbligh3409ee72007-10-16 23:58:33 +0000229 def __start_console_log(self, logfilename):
230 """
231 Log the output of the console session to a specified file
232 """
233 if logfilename == None:
234 return
235 if not self.conmux_attach or not os.path.exists(self.conmux_attach):
236 return
mblighe6c995f2007-10-26 19:43:01 +0000237 cmd = [self.conmux_attach, self.__conmux_hostname(), 'cat - >> %s' % logfilename]
mbligh3409ee72007-10-16 23:58:33 +0000238 logger = subprocess.Popen(cmd,
239 stderr=open('/dev/null', 'w'),
240 preexec_fn=lambda: os.setpgid(0, 0))
241 self.logger_pid = logger.pid
242
243
mbligh94befff2007-12-10 18:03:14 +0000244 def __start_warning_log(self, logfilename):
mblighe6c995f2007-10-26 19:43:01 +0000245 """
246 Log the output of the warning monitor to a specified file
247 """
mbligh8bfa9f92007-11-24 19:29:30 +0000248 if logfilename == None or not os.path.isdir('debug'):
mblighe6c995f2007-10-26 19:43:01 +0000249 return
250 script_path = os.path.join(self.serverdir, 'warning_monitor')
mbligh7c5452d2007-11-05 18:35:31 +0000251 script_cmd = 'expect %s %s >> %s' % (script_path,
252 self.hostname,
253 logfilename)
mblighe6c995f2007-10-26 19:43:01 +0000254 if self.conmux_server:
255 to = '%s/%s'
256 cmd = [self.conmux_attach, self.__conmux_hostname(), script_cmd]
257 logger = subprocess.Popen(cmd,
mbligh7c5452d2007-11-05 18:35:31 +0000258 stderr=open('debug/conmux.log', 'a', 0),
mblighe6c995f2007-10-26 19:43:01 +0000259 preexec_fn=lambda: os.setpgid(0, 0))
260 self.warning_pid = logger.pid
261
262
mbligh3409ee72007-10-16 23:58:33 +0000263 def __find_console_attach(self, conmux_attach):
264 if conmux_attach:
265 return conmux_attach
266 try:
267 res = utils.run('which conmux-attach')
268 if res.exit_status == 0:
269 return res.stdout.strip()
mbligh03f4fc72007-11-29 20:56:14 +0000270 except AutoservRunError, e:
mbligh3409ee72007-10-16 23:58:33 +0000271 pass
mbligh9708f732007-10-18 03:18:54 +0000272 autotest_conmux = os.path.join(self.serverdir, '..',
mbligh3409ee72007-10-16 23:58:33 +0000273 'conmux', 'conmux-attach')
mbligh9708f732007-10-18 03:18:54 +0000274 autotest_conmux_alt = os.path.join(self.serverdir,
mbligh3409ee72007-10-16 23:58:33 +0000275 '..', 'autotest',
276 'conmux', 'conmux-attach')
277 locations = [autotest_conmux,
278 autotest_conmux_alt,
279 '/usr/local/conmux/bin/conmux-attach',
280 '/usr/bin/conmux-attach']
281 for l in locations:
282 if os.path.exists(l):
283 return l
284
285 print "WARNING: conmux-attach not found on autoserv server"
286 return None
287
288
289 def __console_run(self, cmd):
290 """
291 Send a command to the conmux session
292 """
293 if not self.conmux_attach or not os.path.exists(self.conmux_attach):
294 return False
mbligh3409ee72007-10-16 23:58:33 +0000295 cmd = '%s %s echo %s 2> /dev/null' % (self.conmux_attach,
mblighe6c995f2007-10-26 19:43:01 +0000296 self.__conmux_hostname(),
mbligh3409ee72007-10-16 23:58:33 +0000297 cmd)
298 result = os.system(cmd)
299 return result == 0
mbligh7d2bde82007-08-02 16:26:10 +0000300
301
mbligh31a49de2007-11-05 18:41:19 +0000302 def __record(self, status_code, subdir, operation, status = ''):
303 if self.job:
304 self.job.record(status_code, subdir, operation, status)
305 else:
306 if not subdir:
307 subdir = "----"
308 msg = "%s\t%s\t%s\t%s" % (status_code, subdir, operation, status)
309 sys.stderr.write(msg + "\n")
310
311
mblighfa971602008-01-03 01:57:20 +0000312 def ssh_base_command(self, connect_timeout=30):
313 SSH_BASE_COMMAND = '/usr/bin/ssh -a -x -o ' + \
314 'BatchMode=yes -o ConnectTimeout=%d'
315 assert isinstance(connect_timeout, (int, long))
316 assert connect_timeout > 0 # can't disable the timeout
317 return SSH_BASE_COMMAND % connect_timeout
318
319
320 def ssh_command(self, connect_timeout=30):
mblighe6647d12007-10-17 00:00:01 +0000321 """Construct an ssh command with proper args for this host."""
mblighfa971602008-01-03 01:57:20 +0000322 ssh = self.ssh_base_command(connect_timeout)
323 return r'%s -l %s -p %d %s' % (ssh,
mbligh0faf91f2007-10-18 03:10:48 +0000324 self.user,
325 self.port,
326 self.hostname)
mblighe6647d12007-10-17 00:00:01 +0000327
328
mbligh07a923f2008-01-16 17:49:04 +0000329 def run(self, command, timeout=3600, ignore_status=False,
mblighfa971602008-01-03 01:57:20 +0000330 stdout_tee=None, stderr_tee=None, connect_timeout=30):
mbligh7d2bde82007-08-02 16:26:10 +0000331 """
332 Run a command on the remote host.
mblighdcd57a82007-07-11 23:06:47 +0000333
334 Args:
335 command: the command line string
336 timeout: time limit in seconds before attempting to
337 kill the running process. The run() function
338 will take a few seconds longer than 'timeout'
339 to complete if it has to kill the process.
mbligh8b85dfb2007-08-28 09:50:31 +0000340 ignore_status: do not raise an exception, no matter
341 what the exit code of the command is.
mblighdcd57a82007-07-11 23:06:47 +0000342
343 Returns:
344 a hosts.base_classes.CmdResult object
345
346 Raises:
347 AutoservRunError: the exit code of the command
348 execution was not 0
349 """
mblighadf2aab2007-11-29 18:16:43 +0000350 stdout = stdout_tee or sys.stdout
351 stderr = stderr_tee or sys.stderr
mbligh7995cc62007-11-30 15:53:23 +0000352 print "ssh: %s" % (command,)
mblighadf2aab2007-11-29 18:16:43 +0000353 env = " ".join("=".join(pair) for pair in self.env.iteritems())
mbligh34faa282008-01-16 17:44:49 +0000354 full_cmd = '%s "%s %s"' % (self.ssh_command(connect_timeout),
355 env, utils.sh_escape(command))
356 result = utils.run(full_cmd, timeout, True, stdout, stderr)
357 if result.exit_status == 255: # ssh's exit status for timeout
358 if re.match(r'^ssh: connect to host .* port .*: ' +
359 r'Connection timed out\r$', result.stderr):
360 raise AutoservSSHTimeout("ssh timed out",
361 result)
362 if not ignore_status and result.exit_status > 0:
363 raise AutoservRunError("command execution error",
364 result)
mblighdcd57a82007-07-11 23:06:47 +0000365 return result
mbligh7d2bde82007-08-02 16:26:10 +0000366
367
mbligh78669ff2008-01-10 16:33:07 +0000368 def run_grep(self, command, timeout=30, ignore_status=False,
369 stdout_ok_regexp=None, stdout_err_regexp=None,
370 stderr_ok_regexp=None, stderr_err_regexp=None,
371 connect_timeout=30):
372 """
373 Run a command on the remote host and look for regexp
374 in stdout or stderr to determine if the command was
375 successul or not.
mbligh6a2a2df2008-01-16 17:41:55 +0000376
mbligh78669ff2008-01-10 16:33:07 +0000377 Args:
378 command: the command line string
mbligh6a2a2df2008-01-16 17:41:55 +0000379 timeout: time limit in seconds before attempting to
mbligh78669ff2008-01-10 16:33:07 +0000380 kill the running process. The run() function
381 will take a few seconds longer than 'timeout'
382 to complete if it has to kill the process.
mbligh6a2a2df2008-01-16 17:41:55 +0000383 ignore_status: do not raise an exception, no matter
mbligh78669ff2008-01-10 16:33:07 +0000384 what the exit code of the command is.
385 stdout_ok_regexp: regexp that should be in stdout
386 if the command was successul.
387 stdout_err_regexp: regexp that should be in stdout
388 if the command failed.
389 stderr_ok_regexp: regexp that should be in stderr
390 if the command was successul.
391 stderr_err_regexp: regexp that should be in stderr
392 if the command failed.
mbligh6a2a2df2008-01-16 17:41:55 +0000393
mbligh78669ff2008-01-10 16:33:07 +0000394 Returns:
395 if the command was successul, raises an exception
396 otherwise.
mbligh6a2a2df2008-01-16 17:41:55 +0000397
mbligh78669ff2008-01-10 16:33:07 +0000398 Raises:
399 AutoservRunError:
400 - the exit code of the command execution was not 0.
mbligh6a2a2df2008-01-16 17:41:55 +0000401 - If stderr_err_regexp is found in stderr,
402 - If stdout_err_regexp is found in stdout,
mbligh78669ff2008-01-10 16:33:07 +0000403 - If stderr_ok_regexp is not found in stderr.
404 - If stdout_ok_regexp is not found in stdout,
405 """
406
407 # We ignore the status, because we will handle it at the end.
408 result = self.run(command, timeout, ignore_status=True,
mbligh6a2a2df2008-01-16 17:41:55 +0000409 connect_timeout=connect_timeout)
mbligh78669ff2008-01-10 16:33:07 +0000410
411 # Look for the patterns, in order
412 for (regexp, stream) in ((stderr_err_regexp, result.stderr),
mbligh6a2a2df2008-01-16 17:41:55 +0000413 (stdout_err_regexp, result.stdout)):
mbligh78669ff2008-01-10 16:33:07 +0000414 if regexp and stream:
415 err_re = re.compile (regexp)
416 if err_re.search(stream):
mbligh6a2a2df2008-01-16 17:41:55 +0000417 raise AutoservRunError(
418 '%s failed, found error pattern: '
419 '"%s"' % (command, regexp), result)
mbligh78669ff2008-01-10 16:33:07 +0000420
421 for (regexp, stream) in ((stderr_ok_regexp, result.stderr),
mbligh6a2a2df2008-01-16 17:41:55 +0000422 (stdout_ok_regexp, result.stdout)):
mbligh78669ff2008-01-10 16:33:07 +0000423 if regexp and stream:
424 ok_re = re.compile (regexp)
425 if ok_re.search(stream):
426 if ok_re.search(stream):
427 return
428
429 if not ignore_status and result.exit_status > 0:
mbligh6a2a2df2008-01-16 17:41:55 +0000430 raise AutoservRunError("command execution error",
431 result)
mbligh78669ff2008-01-10 16:33:07 +0000432
433
mbligh80d20772007-10-29 17:10:10 +0000434 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, label=None,
435 kernel_args=None, wait=True):
mbligh7d2bde82007-08-02 16:26:10 +0000436 """
437 Reboot the remote host.
mbligh8b85dfb2007-08-28 09:50:31 +0000438
mbligha0452c82007-08-08 20:24:57 +0000439 Args:
440 timeout
mbligh8b85dfb2007-08-28 09:50:31 +0000441 """
mbligh33ae0902007-11-24 19:27:08 +0000442 self.reboot_setup()
443
mblighde384372007-10-17 04:25:37 +0000444 # forcibly include the "netconsole" kernel arg
445 if self.__netconsole_param:
446 if kernel_args is None:
447 kernel_args = self.__netconsole_param
448 else:
449 kernel_args += " " + self.__netconsole_param
450 # unload the (possibly loaded) module to avoid shutdown issues
451 self.__unload_netconsole_module()
mbligha0452c82007-08-08 20:24:57 +0000452 if label or kernel_args:
453 self.bootloader.install_boottool()
454 if label:
455 self.bootloader.set_default(label)
456 if kernel_args:
457 if not label:
458 default = int(self.bootloader.get_default())
459 label = self.bootloader.get_titles()[default]
460 self.bootloader.add_args(label, kernel_args)
mblighd742a222007-09-30 01:27:06 +0000461 print "Reboot: initiating reboot"
mbligh30270302007-11-05 20:33:52 +0000462 self.__record("GOOD", None, "reboot.start")
mblighcf3d83a2007-11-05 19:21:39 +0000463 try:
mblighf3b78932007-11-07 16:52:47 +0000464 self.run('(sleep 5; reboot) </dev/null >/dev/null 2>&1 &')
mbligh03f4fc72007-11-29 20:56:14 +0000465 except AutoservRunError:
mblighf3b78932007-11-07 16:52:47 +0000466 self.__record("ABORT", None, "reboot.start",
467 "reboot command failed")
mblighcf3d83a2007-11-05 19:21:39 +0000468 raise
mbligha0452c82007-08-08 20:24:57 +0000469 if wait:
mbligh5deff3d2008-01-04 21:21:28 +0000470 self.wait_for_restart(timeout)
mblighde384372007-10-17 04:25:37 +0000471 self.__load_netconsole_module() # if the builtin fails
mbligha0452c82007-08-08 20:24:57 +0000472
mbligh7d2bde82007-08-02 16:26:10 +0000473
mblighdcd57a82007-07-11 23:06:47 +0000474 def get_file(self, source, dest):
mbligh7d2bde82007-08-02 16:26:10 +0000475 """
476 Copy files from the remote host to a local path.
mblighdcd57a82007-07-11 23:06:47 +0000477
478 Directories will be copied recursively.
479 If a source component is a directory with a trailing slash,
480 the content of the directory will be copied, otherwise, the
481 directory itself and its content will be copied. This
482 behavior is similar to that of the program 'rsync'.
483
484 Args:
485 source: either
486 1) a single file or directory, as a string
487 2) a list of one or more (possibly mixed)
488 files or directories
489 dest: a file or a directory (if source contains a
490 directory or more than one element, you must
491 supply a directory dest)
492
493 Raises:
494 AutoservRunError: the scp command failed
495 """
496 if isinstance(source, types.StringTypes):
497 source= [source]
498
499 processed_source= []
500 for entry in source:
501 if entry.endswith('/'):
502 format_string= '%s@%s:"%s*"'
503 else:
504 format_string= '%s@%s:"%s"'
505 entry= format_string % (self.user, self.hostname,
506 utils.scp_remote_escape(entry))
507 processed_source.append(entry)
508
509 processed_dest= os.path.abspath(dest)
510 if os.path.isdir(dest):
511 processed_dest= "%s/" % (utils.sh_escape(processed_dest),)
512 else:
513 processed_dest= utils.sh_escape(processed_dest)
514
mbligh37d53c32008-01-14 16:16:00 +0000515 try:
516 utils.run('rsync --rsh="%s" -az %s %s' % (
517 self.SSH_BASE_COMMAND, ' '.join(processed_source),
518 processed_dest))
519 except:
520 utils.run('scp -rpq %s "%s"' % (
521 " ".join(processed_source),
522 processed_dest))
mbligh7d2bde82007-08-02 16:26:10 +0000523
524
mblighdcd57a82007-07-11 23:06:47 +0000525 def send_file(self, source, dest):
mbligh7d2bde82007-08-02 16:26:10 +0000526 """
527 Copy files from a local path to the remote host.
mblighdcd57a82007-07-11 23:06:47 +0000528
529 Directories will be copied recursively.
530 If a source component is a directory with a trailing slash,
531 the content of the directory will be copied, otherwise, the
532 directory itself and its content will be copied. This
533 behavior is similar to that of the program 'rsync'.
534
535 Args:
536 source: either
537 1) a single file or directory, as a string
538 2) a list of one or more (possibly mixed)
539 files or directories
540 dest: a file or a directory (if source contains a
541 directory or more than one element, you must
542 supply a directory dest)
543
544 Raises:
545 AutoservRunError: the scp command failed
546 """
547 if isinstance(source, types.StringTypes):
548 source= [source]
549
550 processed_source= []
551 for entry in source:
552 if entry.endswith('/'):
553 format_string= '"%s/"*'
554 else:
555 format_string= '"%s"'
556 entry= format_string % (utils.sh_escape(os.path.abspath(entry)),)
557 processed_source.append(entry)
mbligh7d2bde82007-08-02 16:26:10 +0000558
mbligh0faf91f2007-10-18 03:10:48 +0000559 remote_dest = '%s@%s:"%s"' % (
560 self.user, self.hostname,
561 utils.scp_remote_escape(dest))
mbligh37d53c32008-01-14 16:16:00 +0000562 try:
mbligh74a7c842008-01-16 17:43:24 +0000563 utils.run('rsync --force --rsh="%s" -az %s %s' % (
mblighfa971602008-01-03 01:57:20 +0000564 self.ssh_base_command(), " ".join(processed_source),
mbligh0faf91f2007-10-18 03:10:48 +0000565 remote_dest))
mbligh37d53c32008-01-14 16:16:00 +0000566 except:
mbligh0faf91f2007-10-18 03:10:48 +0000567 utils.run('scp -rpq %s %s' % (
568 " ".join(processed_source),
569 remote_dest))
mblighc42141f2007-11-05 20:25:46 +0000570 self.run('find "%s" -type d | xargs -r chmod o+rx' % dest)
571 self.run('find "%s" -type f | xargs -r chmod o+r' % dest)
mbligh7d2bde82007-08-02 16:26:10 +0000572
mblighdcd57a82007-07-11 23:06:47 +0000573 def get_tmp_dir(self):
mbligh7d2bde82007-08-02 16:26:10 +0000574 """
575 Return the pathname of a directory on the host suitable
mblighdcd57a82007-07-11 23:06:47 +0000576 for temporary file storage.
577
578 The directory and its content will be deleted automatically
579 on the destruction of the Host object that was used to obtain
580 it.
581 """
mbligha25b29e2007-08-26 13:58:04 +0000582 dir_name= self.run("mktemp -d /tmp/autoserv-XXXXXX").stdout.rstrip(" \n")
mblighdcd57a82007-07-11 23:06:47 +0000583 self.tmp_dirs.append(dir_name)
584 return dir_name
mbligh7d2bde82007-08-02 16:26:10 +0000585
586
mblighdcd57a82007-07-11 23:06:47 +0000587 def is_up(self):
mbligh7d2bde82007-08-02 16:26:10 +0000588 """
589 Check if the remote host is up.
mblighdcd57a82007-07-11 23:06:47 +0000590
591 Returns:
592 True if the remote host is up, False otherwise
593 """
594 try:
mbligh4cfa76a2007-11-26 20:45:16 +0000595 self.ssh_ping()
mbligheadfbb12007-11-26 23:03:12 +0000596 except:
mblighdcd57a82007-07-11 23:06:47 +0000597 return False
mbligheadfbb12007-11-26 23:03:12 +0000598 return True
mbligh7d2bde82007-08-02 16:26:10 +0000599
mbligh7d2bde82007-08-02 16:26:10 +0000600
mblighdcd57a82007-07-11 23:06:47 +0000601 def wait_up(self, timeout=None):
mbligh7d2bde82007-08-02 16:26:10 +0000602 """
603 Wait until the remote host is up or the timeout expires.
mblighdcd57a82007-07-11 23:06:47 +0000604
605 In fact, it will wait until an ssh connection to the remote
606 host can be established.
607
608 Args:
609 timeout: time limit in seconds before returning even
610 if the host is not up.
611
612 Returns:
613 True if the host was found to be up, False otherwise
614 """
615 if timeout:
616 end_time= time.time() + timeout
617
618 while not timeout or time.time() < end_time:
619 try:
mbligh4cfa76a2007-11-26 20:45:16 +0000620 self.ssh_ping()
mbligheadfbb12007-11-26 23:03:12 +0000621 except:
mblighdcd57a82007-07-11 23:06:47 +0000622 pass
623 else:
mbligheadfbb12007-11-26 23:03:12 +0000624 return True
mblighdcd57a82007-07-11 23:06:47 +0000625 time.sleep(1)
626
627 return False
mbligh7d2bde82007-08-02 16:26:10 +0000628
629
mblighdcd57a82007-07-11 23:06:47 +0000630 def wait_down(self, timeout=None):
mbligh7d2bde82007-08-02 16:26:10 +0000631 """
632 Wait until the remote host is down or the timeout expires.
mblighdcd57a82007-07-11 23:06:47 +0000633
634 In fact, it will wait until an ssh connection to the remote
635 host fails.
636
637 Args:
638 timeout: time limit in seconds before returning even
639 if the host is not up.
640
641 Returns:
642 True if the host was found to be down, False otherwise
643 """
644 if timeout:
645 end_time= time.time() + timeout
646
647 while not timeout or time.time() < end_time:
648 try:
mbligh4cfa76a2007-11-26 20:45:16 +0000649 self.ssh_ping()
mbligheadfbb12007-11-26 23:03:12 +0000650 except:
mblighdcd57a82007-07-11 23:06:47 +0000651 return True
mblighdcd57a82007-07-11 23:06:47 +0000652 time.sleep(1)
653
654 return False
mbligh7d2bde82007-08-02 16:26:10 +0000655
656
mblighdbe4a382007-07-26 19:41:28 +0000657 def ensure_up(self):
mbligh7d2bde82007-08-02 16:26:10 +0000658 """
659 Ensure the host is up if it is not then do not proceed;
660 this prevents cacading failures of tests
661 """
mbligha0452c82007-08-08 20:24:57 +0000662 print 'Ensuring that %s is up before continuing' % self.hostname
663 if hasattr(self, 'hardreset') and not self.wait_up(300):
mblighdbe4a382007-07-26 19:41:28 +0000664 print "Performing a hardreset on %s" % self.hostname
mbligh4ba0b462007-11-05 23:05:40 +0000665 try:
666 self.hardreset()
mbligh03f4fc72007-11-29 20:56:14 +0000667 except AutoservUnsupportedError:
mbligh4ba0b462007-11-05 23:05:40 +0000668 print "Hardreset is unsupported on %s" % self.hostname
mbligha9563b92007-10-25 14:45:56 +0000669 if not self.wait_up(60 * 30):
670 # 30 minutes should be more than enough
mbligh03f4fc72007-11-29 20:56:14 +0000671 raise AutoservHostError
mbligha0452c82007-08-08 20:24:57 +0000672 print 'Host up, continuing'
mbligh7d2bde82007-08-02 16:26:10 +0000673
674
mblighdcd57a82007-07-11 23:06:47 +0000675 def get_num_cpu(self):
mbligh7d2bde82007-08-02 16:26:10 +0000676 """
677 Get the number of CPUs in the host according to
mblighdcd57a82007-07-11 23:06:47 +0000678 /proc/cpuinfo.
679
680 Returns:
681 The number of CPUs
682 """
683
mbligh5f876ad2007-10-12 23:59:53 +0000684 proc_cpuinfo = self.run("cat /proc/cpuinfo").stdout
mblighdcd57a82007-07-11 23:06:47 +0000685 cpus = 0
686 for line in proc_cpuinfo.splitlines():
687 if line.startswith('processor'):
688 cpus += 1
689 return cpus
mbligh5f876ad2007-10-12 23:59:53 +0000690
691
692 def check_uptime(self):
693 """
694 Check that uptime is available and monotonically increasing.
695 """
696 if not self.ping():
mbligh4d6feff2008-01-14 16:48:56 +0000697 raise AutoservHostError('Client is not pingable')
mbligh5f876ad2007-10-12 23:59:53 +0000698 result = self.run("/bin/cat /proc/uptime", 30)
699 return result.stdout.strip().split()[0]
700
701
702 def get_arch(self):
703 """
704 Get the hardware architecture of the remote machine
705 """
706 arch = self.run('/bin/uname -m').stdout.rstrip()
707 if re.match(r'i\d86$', arch):
708 arch = 'i386'
709 return arch
710
711
712 def get_kernel_ver(self):
713 """
714 Get the kernel version of the remote machine
715 """
716 return self.run('/bin/uname -r').stdout.rstrip()
717
718
719 def get_cmdline(self):
720 """
721 Get the kernel command line of the remote machine
722 """
723 return self.run('cat /proc/cmdline').stdout.rstrip()
724
725
726 def ping(self):
727 """
728 Ping the remote system, and return whether it's available
729 """
730 fpingcmd = "%s -q %s" % ('/usr/bin/fping', self.hostname)
731 rc = utils.system(fpingcmd, ignore_status = 1)
732 return (rc == 0)
mblighd2e46052007-11-05 18:31:00 +0000733
mblighf014ff42007-11-26 21:33:11 +0000734
mbligh4cfa76a2007-11-26 20:45:16 +0000735 def ssh_ping(self, timeout = 60):
mblighfa971602008-01-03 01:57:20 +0000736 self.run('true', connect_timeout = timeout)
mblighda13d542008-01-03 16:28:34 +0000737
738
739 def get_autodir(self):
740 return self.autodir