blob: c4de29e409c2dab70256f8212ab333a51b04f95b [file] [log] [blame]
mblighdcd57a82007-07-11 23:06:47 +00001#!/usr/bin/python
2#
3# Copyright 2007 Google Inc. Released under the GPL v2
4
mbligh7d2bde82007-08-02 16:26:10 +00005"""
6This module defines the SSHHost class.
mblighdcd57a82007-07-11 23:06:47 +00007
8Implementation details:
9You should import the "hosts" package instead of importing each type of host.
10
11 SSHHost: a remote machine with a ssh access
12"""
13
mbligh7d2bde82007-08-02 16:26:10 +000014__author__ = """
15mbligh@google.com (Martin J. Bligh),
mblighdcd57a82007-07-11 23:06:47 +000016poirier@google.com (Benjamin Poirier),
mbligh7d2bde82007-08-02 16:26:10 +000017stutsman@google.com (Ryan Stutsman)
18"""
mblighdcd57a82007-07-11 23:06:47 +000019
20
mblighde384372007-10-17 04:25:37 +000021import types, os, sys, signal, subprocess, time, re, socket
mbligh03f4fc72007-11-29 20:56:14 +000022import base_classes, utils, bootloader
23
24from common.error import *
mblighdcd57a82007-07-11 23:06:47 +000025
26
27class SSHHost(base_classes.RemoteHost):
mbligh7d2bde82007-08-02 16:26:10 +000028 """
29 This class represents a remote machine controlled through an ssh
mblighdcd57a82007-07-11 23:06:47 +000030 session on which you can run programs.
mbligh7d2bde82007-08-02 16:26:10 +000031
mblighdcd57a82007-07-11 23:06:47 +000032 It is not the machine autoserv is running on. The machine must be
33 configured for password-less login, for example through public key
34 authentication.
mbligh7d2bde82007-08-02 16:26:10 +000035
mbligh3409ee72007-10-16 23:58:33 +000036 It includes support for controlling the machine through a serial
37 console on which you can run programs. If such a serial console is
38 set up on the machine then capabilities such as hard reset and
39 boot strap monitoring are available. If the machine does not have a
40 serial console available then ordinary SSH-based commands will
41 still be available, but attempts to use extensions such as
42 console logging or hard reset will fail silently.
43
mblighdcd57a82007-07-11 23:06:47 +000044 Implementation details:
45 This is a leaf class in an abstract class hierarchy, it must
46 implement the unimplemented methods in parent classes.
47 """
mbligh7d2bde82007-08-02 16:26:10 +000048
mbligh31a49de2007-11-05 18:41:19 +000049 DEFAULT_REBOOT_TIMEOUT = 1800
50 job = None
mbligh0faf91f2007-10-18 03:10:48 +000051
mblighde384372007-10-17 04:25:37 +000052 def __init__(self, hostname, user="root", port=22, initialize=True,
mbligh7c5452d2007-11-05 18:35:31 +000053 conmux_log="console.log", conmux_warnings="status.log",
mblighe6c995f2007-10-26 19:43:01 +000054 conmux_server=None, conmux_attach=None,
mblighda13d542008-01-03 16:28:34 +000055 netconsole_log=None, netconsole_port=6666, autodir=None):
mbligh7d2bde82007-08-02 16:26:10 +000056 """
57 Construct a SSHHost object
mblighdcd57a82007-07-11 23:06:47 +000058
59 Args:
60 hostname: network hostname or address of remote machine
61 user: user to log in as on the remote machine
62 port: port the ssh daemon is listening on on the remote
63 machine
mbligh9708f732007-10-18 03:18:54 +000064 """
mblighdcd57a82007-07-11 23:06:47 +000065 self.hostname= hostname
66 self.user= user
67 self.port= port
68 self.tmp_dirs= []
mbligh137a05c2007-10-04 15:56:51 +000069 self.initialize = initialize
mblighda13d542008-01-03 16:28:34 +000070 self.autodir = autodir
mbligh91334902007-09-28 01:47:59 +000071
mbligh9708f732007-10-18 03:18:54 +000072 super(SSHHost, self).__init__()
73
mbligh3409ee72007-10-16 23:58:33 +000074 self.conmux_server = conmux_server
mbligh70cf0ec2008-01-18 17:57:14 +000075 if conmux_attach:
76 self.conmux_attach = conmux_attach
77 else:
78 self.conmux_attach = os.path.abspath(os.path.join(
79 self.serverdir, '..',
80 'conmux', 'conmux-attach'))
mbligh3409ee72007-10-16 23:58:33 +000081 self.logger_pid = None
mblighde384372007-10-17 04:25:37 +000082 self.__start_console_log(conmux_log)
mblighe6c995f2007-10-26 19:43:01 +000083 self.warning_pid = None
84 self.__start_warning_log(conmux_warnings)
mbligh3409ee72007-10-16 23:58:33 +000085
mbligha0452c82007-08-08 20:24:57 +000086 self.bootloader = bootloader.Bootloader(self)
mbligh7d2bde82007-08-02 16:26:10 +000087
mblighc0e92392007-11-05 19:10:10 +000088 self.__netconsole_param = ""
mblighde384372007-10-17 04:25:37 +000089 self.netlogger_pid = None
mblighc0e92392007-11-05 19:10:10 +000090 if netconsole_log:
91 self.__init_netconsole_params(netconsole_port)
92 self.__start_netconsole_log(netconsole_log, netconsole_port)
93 self.__load_netconsole_module()
mblighde384372007-10-17 04:25:37 +000094
mbligh7d2bde82007-08-02 16:26:10 +000095
mblighdcd57a82007-07-11 23:06:47 +000096 def __del__(self):
mbligh7d2bde82007-08-02 16:26:10 +000097 """
98 Destroy a SSHHost object
mblighdcd57a82007-07-11 23:06:47 +000099 """
100 for dir in self.tmp_dirs:
101 try:
102 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
mbligh03f4fc72007-11-29 20:56:14 +0000103 except AutoservRunError:
mblighdcd57a82007-07-11 23:06:47 +0000104 pass
mblighde384372007-10-17 04:25:37 +0000105 # kill the console logger
mbligh7364ae42007-10-18 03:20:34 +0000106 if getattr(self, 'logger_pid', None):
mbligh3409ee72007-10-16 23:58:33 +0000107 try:
108 pgid = os.getpgid(self.logger_pid)
109 os.killpg(pgid, signal.SIGTERM)
110 except OSError:
111 pass
mblighde384372007-10-17 04:25:37 +0000112 # kill the netconsole logger
mbligh7364ae42007-10-18 03:20:34 +0000113 if getattr(self, 'netlogger_pid', None):
mblighe6c995f2007-10-26 19:43:01 +0000114 self.__unload_netconsole_module()
mblighde384372007-10-17 04:25:37 +0000115 try:
116 os.kill(self.netlogger_pid, signal.SIGTERM)
117 except OSError:
118 pass
mblighe6c995f2007-10-26 19:43:01 +0000119 # kill the warning logger
120 if getattr(self, 'warning_pid', None):
121 try:
122 pgid = os.getpgid(self.warning_pid)
123 os.killpg(pgid, signal.SIGTERM)
124 except OSError:
125 pass
mblighde384372007-10-17 04:25:37 +0000126
127
128 def __init_netconsole_params(self, port):
129 """
130 Connect to the remote machine and determine the values to use for the
131 required netconsole parameters.
132 """
mblighde384372007-10-17 04:25:37 +0000133 # PROBLEM: on machines with multiple IPs this may not make any sense
134 # It also doesn't work with IPv6
135 remote_ip = socket.gethostbyname(self.hostname)
136 local_ip = socket.gethostbyname(socket.gethostname())
137 # Get the gateway of the remote machine
138 try:
139 traceroute = self.run('traceroute -n %s' % local_ip)
mbligh03f4fc72007-11-29 20:56:14 +0000140 except AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000141 return
142 first_node = traceroute.stdout.split("\n")[0]
143 match = re.search(r'\s+((\d+\.){3}\d+)\s+', first_node)
144 if match:
145 router_ip = match.group(1)
146 else:
147 return
148 # Look up the MAC address of the gateway
149 try:
150 self.run('ping -c 1 %s' % router_ip)
151 arp = self.run('arp -n -a %s' % router_ip)
mbligh03f4fc72007-11-29 20:56:14 +0000152 except AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000153 return
154 match = re.search(r'\s+(([0-9A-F]{2}:){5}[0-9A-F]{2})\s+', arp.stdout)
155 if match:
156 gateway_mac = match.group(1)
157 else:
158 return
159 self.__netconsole_param = 'netconsole=@%s/,%s@%s/%s' % (remote_ip,
160 port,
161 local_ip,
162 gateway_mac)
163
164
165 def __start_netconsole_log(self, logfilename, port):
166 """
167 Log the output of netconsole to a specified file
168 """
169 if logfilename == None:
170 return
171 cmd = ['nc', '-u', '-l', '-p', str(port)]
mblighd2fc50f2007-10-23 22:38:00 +0000172 logger = subprocess.Popen(cmd, stdout=open(logfilename, "a", 0))
mblighde384372007-10-17 04:25:37 +0000173 self.netlogger_pid = logger.pid
174
175
176 def __load_netconsole_module(self):
177 """
178 Make a best effort to load the netconsole module.
179
180 Note that loading the module can fail even when the remote machine is
181 working correctly if netconsole is already compiled into the kernel
182 and started.
183 """
mblighc0e92392007-11-05 19:10:10 +0000184 if not self.__netconsole_param:
185 return
mblighde384372007-10-17 04:25:37 +0000186 try:
187 self.run('modprobe netconsole %s' % self.__netconsole_param)
mbligh03f4fc72007-11-29 20:56:14 +0000188 except AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000189 # if it fails there isn't much we can do, just keep going
190 pass
191
192
193 def __unload_netconsole_module(self):
194 try:
195 self.run('modprobe -r netconsole')
mbligh03f4fc72007-11-29 20:56:14 +0000196 except AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000197 pass
mbligh3409ee72007-10-16 23:58:33 +0000198
199
mbligh5deff3d2008-01-04 21:21:28 +0000200 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT):
mblighd567f722007-10-30 15:37:33 +0000201 if not self.wait_down(300): # Make sure he's dead, Jim
mblighf3b78932007-11-07 16:52:47 +0000202 self.__record("ABORT", None, "reboot.verify", "shutdown failed")
mbligh03f4fc72007-11-29 20:56:14 +0000203 raise AutoservRebootError("Host did not shut down")
mbligh3409ee72007-10-16 23:58:33 +0000204 self.wait_up(timeout)
205 time.sleep(2) # this is needed for complete reliability
mblighcf3d83a2007-11-05 19:21:39 +0000206 if self.wait_up(timeout):
mbligh30270302007-11-05 20:33:52 +0000207 self.__record("GOOD", None, "reboot.verify")
mblighcf3d83a2007-11-05 19:21:39 +0000208 else:
mblighf3b78932007-11-07 16:52:47 +0000209 self.__record("ABORT", None, "reboot.verify", "bringup failed")
mbligh03f4fc72007-11-29 20:56:14 +0000210 raise AutoservRebootError("Host did not return from reboot")
mbligh3409ee72007-10-16 23:58:33 +0000211 print "Reboot complete"
212
213
mbligh80d20772007-10-29 17:10:10 +0000214 def hardreset(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True):
mbligh3409ee72007-10-16 23:58:33 +0000215 """
216 Reach out and slap the box in the power switch
217 """
mblighf3b78932007-11-07 16:52:47 +0000218 if not self.__console_run(r"'~$hardreset'"):
219 self.__record("ABORT", None, "reboot.start", "hard reset unavailable")
mbligh4d6feff2008-01-14 16:48:56 +0000220 raise AutoservUnsupportedError('Hard reset unavailable')
mbligh37d53c32008-01-14 16:16:00 +0000221
222 if wait:
223 self.wait_for_restart(timeout)
mbligha4d4f372008-01-22 15:49:50 +0000224 self.__record("GOOD", None, "reboot.start", "hard reset")
mbligh3409ee72007-10-16 23:58:33 +0000225
226
mblighe6c995f2007-10-26 19:43:01 +0000227 def __conmux_hostname(self):
228 if self.conmux_server:
229 return '%s/%s' % (self.conmux_server, self.hostname)
230 else:
231 return self.hostname
232
233
mbligh3409ee72007-10-16 23:58:33 +0000234 def __start_console_log(self, logfilename):
235 """
236 Log the output of the console session to a specified file
237 """
238 if logfilename == None:
239 return
240 if not self.conmux_attach or not os.path.exists(self.conmux_attach):
241 return
mblighe6c995f2007-10-26 19:43:01 +0000242 cmd = [self.conmux_attach, self.__conmux_hostname(), 'cat - >> %s' % logfilename]
mbligh3409ee72007-10-16 23:58:33 +0000243 logger = subprocess.Popen(cmd,
244 stderr=open('/dev/null', 'w'),
245 preexec_fn=lambda: os.setpgid(0, 0))
246 self.logger_pid = logger.pid
247
248
mbligh94befff2007-12-10 18:03:14 +0000249 def __start_warning_log(self, logfilename):
mblighe6c995f2007-10-26 19:43:01 +0000250 """
251 Log the output of the warning monitor to a specified file
252 """
mbligh8bfa9f92007-11-24 19:29:30 +0000253 if logfilename == None or not os.path.isdir('debug'):
mblighe6c995f2007-10-26 19:43:01 +0000254 return
255 script_path = os.path.join(self.serverdir, 'warning_monitor')
mbligh7c5452d2007-11-05 18:35:31 +0000256 script_cmd = 'expect %s %s >> %s' % (script_path,
257 self.hostname,
258 logfilename)
mblighe6c995f2007-10-26 19:43:01 +0000259 if self.conmux_server:
260 to = '%s/%s'
261 cmd = [self.conmux_attach, self.__conmux_hostname(), script_cmd]
262 logger = subprocess.Popen(cmd,
mbligh7c5452d2007-11-05 18:35:31 +0000263 stderr=open('debug/conmux.log', 'a', 0),
mblighe6c995f2007-10-26 19:43:01 +0000264 preexec_fn=lambda: os.setpgid(0, 0))
265 self.warning_pid = logger.pid
266
267
mbligh3409ee72007-10-16 23:58:33 +0000268 def __console_run(self, cmd):
269 """
270 Send a command to the conmux session
271 """
272 if not self.conmux_attach or not os.path.exists(self.conmux_attach):
273 return False
mbligh3409ee72007-10-16 23:58:33 +0000274 cmd = '%s %s echo %s 2> /dev/null' % (self.conmux_attach,
mblighe6c995f2007-10-26 19:43:01 +0000275 self.__conmux_hostname(),
mbligh3409ee72007-10-16 23:58:33 +0000276 cmd)
mbligh0f5ad642008-01-22 16:37:40 +0000277 result = utils.system(cmd, ignore_status=True)
mbligh3409ee72007-10-16 23:58:33 +0000278 return result == 0
mbligh7d2bde82007-08-02 16:26:10 +0000279
280
mbligh31a49de2007-11-05 18:41:19 +0000281 def __record(self, status_code, subdir, operation, status = ''):
282 if self.job:
283 self.job.record(status_code, subdir, operation, status)
284 else:
285 if not subdir:
286 subdir = "----"
287 msg = "%s\t%s\t%s\t%s" % (status_code, subdir, operation, status)
288 sys.stderr.write(msg + "\n")
289
290
mblighfa971602008-01-03 01:57:20 +0000291 def ssh_base_command(self, connect_timeout=30):
292 SSH_BASE_COMMAND = '/usr/bin/ssh -a -x -o ' + \
293 'BatchMode=yes -o ConnectTimeout=%d'
294 assert isinstance(connect_timeout, (int, long))
295 assert connect_timeout > 0 # can't disable the timeout
296 return SSH_BASE_COMMAND % connect_timeout
297
298
299 def ssh_command(self, connect_timeout=30):
mblighe6647d12007-10-17 00:00:01 +0000300 """Construct an ssh command with proper args for this host."""
mblighfa971602008-01-03 01:57:20 +0000301 ssh = self.ssh_base_command(connect_timeout)
302 return r'%s -l %s -p %d %s' % (ssh,
mbligh0faf91f2007-10-18 03:10:48 +0000303 self.user,
304 self.port,
305 self.hostname)
mblighe6647d12007-10-17 00:00:01 +0000306
307
mbligh07a923f2008-01-16 17:49:04 +0000308 def run(self, command, timeout=3600, ignore_status=False,
mblighfa971602008-01-03 01:57:20 +0000309 stdout_tee=None, stderr_tee=None, connect_timeout=30):
mbligh7d2bde82007-08-02 16:26:10 +0000310 """
311 Run a command on the remote host.
mblighdcd57a82007-07-11 23:06:47 +0000312
313 Args:
314 command: the command line string
315 timeout: time limit in seconds before attempting to
316 kill the running process. The run() function
317 will take a few seconds longer than 'timeout'
318 to complete if it has to kill the process.
mbligh8b85dfb2007-08-28 09:50:31 +0000319 ignore_status: do not raise an exception, no matter
320 what the exit code of the command is.
mblighdcd57a82007-07-11 23:06:47 +0000321
322 Returns:
323 a hosts.base_classes.CmdResult object
324
325 Raises:
326 AutoservRunError: the exit code of the command
327 execution was not 0
328 """
mblighadf2aab2007-11-29 18:16:43 +0000329 stdout = stdout_tee or sys.stdout
330 stderr = stderr_tee or sys.stderr
mbligh7995cc62007-11-30 15:53:23 +0000331 print "ssh: %s" % (command,)
mblighadf2aab2007-11-29 18:16:43 +0000332 env = " ".join("=".join(pair) for pair in self.env.iteritems())
mbligh34faa282008-01-16 17:44:49 +0000333 full_cmd = '%s "%s %s"' % (self.ssh_command(connect_timeout),
334 env, utils.sh_escape(command))
335 result = utils.run(full_cmd, timeout, True, stdout, stderr)
336 if result.exit_status == 255: # ssh's exit status for timeout
337 if re.match(r'^ssh: connect to host .* port .*: ' +
338 r'Connection timed out\r$', result.stderr):
339 raise AutoservSSHTimeout("ssh timed out",
340 result)
341 if not ignore_status and result.exit_status > 0:
342 raise AutoservRunError("command execution error",
343 result)
mblighdcd57a82007-07-11 23:06:47 +0000344 return result
mbligh7d2bde82007-08-02 16:26:10 +0000345
346
mbligh78669ff2008-01-10 16:33:07 +0000347 def run_grep(self, command, timeout=30, ignore_status=False,
348 stdout_ok_regexp=None, stdout_err_regexp=None,
349 stderr_ok_regexp=None, stderr_err_regexp=None,
350 connect_timeout=30):
351 """
352 Run a command on the remote host and look for regexp
353 in stdout or stderr to determine if the command was
354 successul or not.
mbligh6a2a2df2008-01-16 17:41:55 +0000355
mbligh78669ff2008-01-10 16:33:07 +0000356 Args:
357 command: the command line string
mbligh6a2a2df2008-01-16 17:41:55 +0000358 timeout: time limit in seconds before attempting to
mbligh78669ff2008-01-10 16:33:07 +0000359 kill the running process. The run() function
360 will take a few seconds longer than 'timeout'
361 to complete if it has to kill the process.
mbligh6a2a2df2008-01-16 17:41:55 +0000362 ignore_status: do not raise an exception, no matter
mbligh78669ff2008-01-10 16:33:07 +0000363 what the exit code of the command is.
364 stdout_ok_regexp: regexp that should be in stdout
365 if the command was successul.
366 stdout_err_regexp: regexp that should be in stdout
367 if the command failed.
368 stderr_ok_regexp: regexp that should be in stderr
369 if the command was successul.
370 stderr_err_regexp: regexp that should be in stderr
371 if the command failed.
mbligh6a2a2df2008-01-16 17:41:55 +0000372
mbligh78669ff2008-01-10 16:33:07 +0000373 Returns:
374 if the command was successul, raises an exception
375 otherwise.
mbligh6a2a2df2008-01-16 17:41:55 +0000376
mbligh78669ff2008-01-10 16:33:07 +0000377 Raises:
378 AutoservRunError:
379 - the exit code of the command execution was not 0.
mbligh6a2a2df2008-01-16 17:41:55 +0000380 - If stderr_err_regexp is found in stderr,
381 - If stdout_err_regexp is found in stdout,
mbligh78669ff2008-01-10 16:33:07 +0000382 - If stderr_ok_regexp is not found in stderr.
383 - If stdout_ok_regexp is not found in stdout,
384 """
385
386 # We ignore the status, because we will handle it at the end.
387 result = self.run(command, timeout, ignore_status=True,
mbligh6a2a2df2008-01-16 17:41:55 +0000388 connect_timeout=connect_timeout)
mbligh78669ff2008-01-10 16:33:07 +0000389
390 # Look for the patterns, in order
391 for (regexp, stream) in ((stderr_err_regexp, result.stderr),
mbligh6a2a2df2008-01-16 17:41:55 +0000392 (stdout_err_regexp, result.stdout)):
mbligh78669ff2008-01-10 16:33:07 +0000393 if regexp and stream:
394 err_re = re.compile (regexp)
395 if err_re.search(stream):
mbligh6a2a2df2008-01-16 17:41:55 +0000396 raise AutoservRunError(
397 '%s failed, found error pattern: '
398 '"%s"' % (command, regexp), result)
mbligh78669ff2008-01-10 16:33:07 +0000399
400 for (regexp, stream) in ((stderr_ok_regexp, result.stderr),
mbligh6a2a2df2008-01-16 17:41:55 +0000401 (stdout_ok_regexp, result.stdout)):
mbligh78669ff2008-01-10 16:33:07 +0000402 if regexp and stream:
403 ok_re = re.compile (regexp)
404 if ok_re.search(stream):
405 if ok_re.search(stream):
406 return
407
408 if not ignore_status and result.exit_status > 0:
mbligh6a2a2df2008-01-16 17:41:55 +0000409 raise AutoservRunError("command execution error",
410 result)
mbligh78669ff2008-01-10 16:33:07 +0000411
412
mbligh80d20772007-10-29 17:10:10 +0000413 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, label=None,
414 kernel_args=None, wait=True):
mbligh7d2bde82007-08-02 16:26:10 +0000415 """
416 Reboot the remote host.
mbligh8b85dfb2007-08-28 09:50:31 +0000417
mbligha0452c82007-08-08 20:24:57 +0000418 Args:
419 timeout
mbligh8b85dfb2007-08-28 09:50:31 +0000420 """
mbligh33ae0902007-11-24 19:27:08 +0000421 self.reboot_setup()
422
mblighde384372007-10-17 04:25:37 +0000423 # forcibly include the "netconsole" kernel arg
424 if self.__netconsole_param:
425 if kernel_args is None:
426 kernel_args = self.__netconsole_param
427 else:
428 kernel_args += " " + self.__netconsole_param
429 # unload the (possibly loaded) module to avoid shutdown issues
430 self.__unload_netconsole_module()
mbligha0452c82007-08-08 20:24:57 +0000431 if label or kernel_args:
432 self.bootloader.install_boottool()
433 if label:
434 self.bootloader.set_default(label)
435 if kernel_args:
436 if not label:
437 default = int(self.bootloader.get_default())
438 label = self.bootloader.get_titles()[default]
439 self.bootloader.add_args(label, kernel_args)
mblighd742a222007-09-30 01:27:06 +0000440 print "Reboot: initiating reboot"
mbligh30270302007-11-05 20:33:52 +0000441 self.__record("GOOD", None, "reboot.start")
mblighcf3d83a2007-11-05 19:21:39 +0000442 try:
mblighf3b78932007-11-07 16:52:47 +0000443 self.run('(sleep 5; reboot) </dev/null >/dev/null 2>&1 &')
mbligh03f4fc72007-11-29 20:56:14 +0000444 except AutoservRunError:
mblighf3b78932007-11-07 16:52:47 +0000445 self.__record("ABORT", None, "reboot.start",
446 "reboot command failed")
mblighcf3d83a2007-11-05 19:21:39 +0000447 raise
mbligha0452c82007-08-08 20:24:57 +0000448 if wait:
mbligh5deff3d2008-01-04 21:21:28 +0000449 self.wait_for_restart(timeout)
mblighde384372007-10-17 04:25:37 +0000450 self.__load_netconsole_module() # if the builtin fails
mbligha0452c82007-08-08 20:24:57 +0000451
mbligh7d2bde82007-08-02 16:26:10 +0000452
mblighcfc7ab32008-01-25 16:35:28 +0000453 def __copy_files(self, sources, dest):
454 """
455 Copy files from one machine to another.
456
457 This is for internal use by other methods that intend to move
458 files between machines. It expects a list of source files and
459 a destination (a filename if the source is a single file, a
460 destination otherwise). The names must already be
461 pre-processed into the appropriate rsync/scp friendly
462 format (%s@%s:%s).
463 """
464 # wait until there are only a small number of copies running
465 # before starting this one
466 MAXIMUM_SIMULTANEOUS_COPIES = 4
467 while True:
468 copy_count = 0
469 procs = utils.system_output('ps -ef')
470 for line in procs:
471 if 'rsync ' in line or 'scp ' in line:
472 copy_count += 1
473 if copy_count < MAXIMUM_SIMULTANEOUS_COPIES:
474 break
475 time.sleep(60)
476
477 try:
478 utils.run('rsync --rsh="%s" -az %s %s' % (
479 self.ssh_base_command(), ' '.join(sources), dest))
480 except Exception:
481 utils.run('scp -rpq %s "%s"' % (
482 ' '.join(sources), dest))
483
484
mblighdcd57a82007-07-11 23:06:47 +0000485 def get_file(self, source, dest):
mbligh7d2bde82007-08-02 16:26:10 +0000486 """
487 Copy files from the remote host to a local path.
mblighdcd57a82007-07-11 23:06:47 +0000488
489 Directories will be copied recursively.
490 If a source component is a directory with a trailing slash,
491 the content of the directory will be copied, otherwise, the
492 directory itself and its content will be copied. This
493 behavior is similar to that of the program 'rsync'.
494
495 Args:
496 source: either
497 1) a single file or directory, as a string
498 2) a list of one or more (possibly mixed)
499 files or directories
500 dest: a file or a directory (if source contains a
501 directory or more than one element, you must
502 supply a directory dest)
503
504 Raises:
505 AutoservRunError: the scp command failed
506 """
507 if isinstance(source, types.StringTypes):
508 source= [source]
509
510 processed_source= []
511 for entry in source:
512 if entry.endswith('/'):
513 format_string= '%s@%s:"%s*"'
514 else:
515 format_string= '%s@%s:"%s"'
516 entry= format_string % (self.user, self.hostname,
517 utils.scp_remote_escape(entry))
518 processed_source.append(entry)
519
520 processed_dest= os.path.abspath(dest)
521 if os.path.isdir(dest):
522 processed_dest= "%s/" % (utils.sh_escape(processed_dest),)
523 else:
524 processed_dest= utils.sh_escape(processed_dest)
mblighcfc7ab32008-01-25 16:35:28 +0000525
526 self.__copy_files(processed_source, processed_dest)
mbligh7d2bde82007-08-02 16:26:10 +0000527
528
mblighdcd57a82007-07-11 23:06:47 +0000529 def send_file(self, source, dest):
mbligh7d2bde82007-08-02 16:26:10 +0000530 """
531 Copy files from a local path to the remote host.
mblighdcd57a82007-07-11 23:06:47 +0000532
533 Directories will be copied recursively.
534 If a source component is a directory with a trailing slash,
535 the content of the directory will be copied, otherwise, the
536 directory itself and its content will be copied. This
537 behavior is similar to that of the program 'rsync'.
538
539 Args:
540 source: either
541 1) a single file or directory, as a string
542 2) a list of one or more (possibly mixed)
543 files or directories
544 dest: a file or a directory (if source contains a
545 directory or more than one element, you must
546 supply a directory dest)
547
548 Raises:
549 AutoservRunError: the scp command failed
550 """
551 if isinstance(source, types.StringTypes):
552 source= [source]
553
554 processed_source= []
555 for entry in source:
556 if entry.endswith('/'):
557 format_string= '"%s/"*'
558 else:
559 format_string= '"%s"'
560 entry= format_string % (utils.sh_escape(os.path.abspath(entry)),)
561 processed_source.append(entry)
mbligh7d2bde82007-08-02 16:26:10 +0000562
mbligh0faf91f2007-10-18 03:10:48 +0000563 remote_dest = '%s@%s:"%s"' % (
564 self.user, self.hostname,
565 utils.scp_remote_escape(dest))
mblighcfc7ab32008-01-25 16:35:28 +0000566
567 self.__copy_files(processed_source, remote_dest)
mblighc42141f2007-11-05 20:25:46 +0000568 self.run('find "%s" -type d | xargs -r chmod o+rx' % dest)
569 self.run('find "%s" -type f | xargs -r chmod o+r' % dest)
mbligh7d2bde82007-08-02 16:26:10 +0000570
mblighdcd57a82007-07-11 23:06:47 +0000571 def get_tmp_dir(self):
mbligh7d2bde82007-08-02 16:26:10 +0000572 """
573 Return the pathname of a directory on the host suitable
mblighdcd57a82007-07-11 23:06:47 +0000574 for temporary file storage.
575
576 The directory and its content will be deleted automatically
577 on the destruction of the Host object that was used to obtain
578 it.
579 """
mbligha25b29e2007-08-26 13:58:04 +0000580 dir_name= self.run("mktemp -d /tmp/autoserv-XXXXXX").stdout.rstrip(" \n")
mblighdcd57a82007-07-11 23:06:47 +0000581 self.tmp_dirs.append(dir_name)
582 return dir_name
mbligh7d2bde82007-08-02 16:26:10 +0000583
584
mblighdcd57a82007-07-11 23:06:47 +0000585 def is_up(self):
mbligh7d2bde82007-08-02 16:26:10 +0000586 """
587 Check if the remote host is up.
mblighdcd57a82007-07-11 23:06:47 +0000588
589 Returns:
590 True if the remote host is up, False otherwise
591 """
592 try:
mbligh4cfa76a2007-11-26 20:45:16 +0000593 self.ssh_ping()
mbligheadfbb12007-11-26 23:03:12 +0000594 except:
mblighdcd57a82007-07-11 23:06:47 +0000595 return False
mbligheadfbb12007-11-26 23:03:12 +0000596 return True
mbligh7d2bde82007-08-02 16:26:10 +0000597
mbligh7d2bde82007-08-02 16:26:10 +0000598
mblighdcd57a82007-07-11 23:06:47 +0000599 def wait_up(self, timeout=None):
mbligh7d2bde82007-08-02 16:26:10 +0000600 """
601 Wait until the remote host is up or the timeout expires.
mblighdcd57a82007-07-11 23:06:47 +0000602
603 In fact, it will wait until an ssh connection to the remote
604 host can be established.
605
606 Args:
607 timeout: time limit in seconds before returning even
608 if the host is not up.
609
610 Returns:
611 True if the host was found to be up, False otherwise
612 """
613 if timeout:
614 end_time= time.time() + timeout
615
616 while not timeout or time.time() < end_time:
617 try:
mbligh4cfa76a2007-11-26 20:45:16 +0000618 self.ssh_ping()
mbligheadfbb12007-11-26 23:03:12 +0000619 except:
mblighdcd57a82007-07-11 23:06:47 +0000620 pass
621 else:
mbligheadfbb12007-11-26 23:03:12 +0000622 return True
mblighdcd57a82007-07-11 23:06:47 +0000623 time.sleep(1)
624
625 return False
mbligh7d2bde82007-08-02 16:26:10 +0000626
627
mblighdcd57a82007-07-11 23:06:47 +0000628 def wait_down(self, timeout=None):
mbligh7d2bde82007-08-02 16:26:10 +0000629 """
630 Wait until the remote host is down or the timeout expires.
mblighdcd57a82007-07-11 23:06:47 +0000631
632 In fact, it will wait until an ssh connection to the remote
633 host fails.
634
635 Args:
636 timeout: time limit in seconds before returning even
637 if the host is not up.
638
639 Returns:
640 True if the host was found to be down, False otherwise
641 """
642 if timeout:
643 end_time= time.time() + timeout
644
645 while not timeout or time.time() < end_time:
646 try:
mbligh4cfa76a2007-11-26 20:45:16 +0000647 self.ssh_ping()
mbligheadfbb12007-11-26 23:03:12 +0000648 except:
mblighdcd57a82007-07-11 23:06:47 +0000649 return True
mblighdcd57a82007-07-11 23:06:47 +0000650 time.sleep(1)
651
652 return False
mbligh7d2bde82007-08-02 16:26:10 +0000653
654
mblighdbe4a382007-07-26 19:41:28 +0000655 def ensure_up(self):
mbligh7d2bde82007-08-02 16:26:10 +0000656 """
657 Ensure the host is up if it is not then do not proceed;
658 this prevents cacading failures of tests
659 """
mbligha0452c82007-08-08 20:24:57 +0000660 print 'Ensuring that %s is up before continuing' % self.hostname
661 if hasattr(self, 'hardreset') and not self.wait_up(300):
mblighdbe4a382007-07-26 19:41:28 +0000662 print "Performing a hardreset on %s" % self.hostname
mbligh4ba0b462007-11-05 23:05:40 +0000663 try:
664 self.hardreset()
mbligh03f4fc72007-11-29 20:56:14 +0000665 except AutoservUnsupportedError:
mbligh4ba0b462007-11-05 23:05:40 +0000666 print "Hardreset is unsupported on %s" % self.hostname
mbligha9563b92007-10-25 14:45:56 +0000667 if not self.wait_up(60 * 30):
668 # 30 minutes should be more than enough
mbligh03f4fc72007-11-29 20:56:14 +0000669 raise AutoservHostError
mbligha0452c82007-08-08 20:24:57 +0000670 print 'Host up, continuing'
mbligh7d2bde82007-08-02 16:26:10 +0000671
672
mblighdcd57a82007-07-11 23:06:47 +0000673 def get_num_cpu(self):
mbligh7d2bde82007-08-02 16:26:10 +0000674 """
675 Get the number of CPUs in the host according to
mblighdcd57a82007-07-11 23:06:47 +0000676 /proc/cpuinfo.
677
678 Returns:
679 The number of CPUs
680 """
681
mbligh5f876ad2007-10-12 23:59:53 +0000682 proc_cpuinfo = self.run("cat /proc/cpuinfo").stdout
mblighdcd57a82007-07-11 23:06:47 +0000683 cpus = 0
684 for line in proc_cpuinfo.splitlines():
685 if line.startswith('processor'):
686 cpus += 1
687 return cpus
mbligh5f876ad2007-10-12 23:59:53 +0000688
689
690 def check_uptime(self):
691 """
692 Check that uptime is available and monotonically increasing.
693 """
694 if not self.ping():
mbligh4d6feff2008-01-14 16:48:56 +0000695 raise AutoservHostError('Client is not pingable')
mbligh5f876ad2007-10-12 23:59:53 +0000696 result = self.run("/bin/cat /proc/uptime", 30)
697 return result.stdout.strip().split()[0]
698
699
700 def get_arch(self):
701 """
702 Get the hardware architecture of the remote machine
703 """
704 arch = self.run('/bin/uname -m').stdout.rstrip()
705 if re.match(r'i\d86$', arch):
706 arch = 'i386'
707 return arch
708
709
710 def get_kernel_ver(self):
711 """
712 Get the kernel version of the remote machine
713 """
714 return self.run('/bin/uname -r').stdout.rstrip()
715
716
717 def get_cmdline(self):
718 """
719 Get the kernel command line of the remote machine
720 """
721 return self.run('cat /proc/cmdline').stdout.rstrip()
722
723
724 def ping(self):
725 """
726 Ping the remote system, and return whether it's available
727 """
728 fpingcmd = "%s -q %s" % ('/usr/bin/fping', self.hostname)
729 rc = utils.system(fpingcmd, ignore_status = 1)
730 return (rc == 0)
mblighd2e46052007-11-05 18:31:00 +0000731
mblighf014ff42007-11-26 21:33:11 +0000732
mbligh4cfa76a2007-11-26 20:45:16 +0000733 def ssh_ping(self, timeout = 60):
mbligh4ff46b02008-02-01 17:33:37 +0000734 self.run('true', timeout = timeout, connect_timeout = timeout)
mblighda13d542008-01-03 16:28:34 +0000735
736
737 def get_autodir(self):
738 return self.autodir