blob: 065ac646e633a7e81fd7154444ae1fc2a21f98cc [file] [log] [blame]
mblighdcd57a82007-07-11 23:06:47 +00001#!/usr/bin/python
2#
3# Copyright 2007 Google Inc. Released under the GPL v2
4
mbligh7d2bde82007-08-02 16:26:10 +00005"""
6This module defines the SSHHost class.
mblighdcd57a82007-07-11 23:06:47 +00007
8Implementation details:
9You should import the "hosts" package instead of importing each type of host.
10
11 SSHHost: a remote machine with a ssh access
12"""
13
mbligh7d2bde82007-08-02 16:26:10 +000014__author__ = """
15mbligh@google.com (Martin J. Bligh),
mblighdcd57a82007-07-11 23:06:47 +000016poirier@google.com (Benjamin Poirier),
mbligh7d2bde82007-08-02 16:26:10 +000017stutsman@google.com (Ryan Stutsman)
18"""
mblighdcd57a82007-07-11 23:06:47 +000019
20
mblighf5427bb2008-04-09 15:55:57 +000021import types, os, sys, signal, subprocess, time, re, socket, pdb
22
23from autotest_lib.client.common_lib import error
24from autotest_lib.server import utils
25import remote, bootloader
mblighdcd57a82007-07-11 23:06:47 +000026
27
mblighbda9c9c2008-04-08 17:45:00 +000028
mblighf5427bb2008-04-09 15:55:57 +000029class SSHHost(remote.RemoteHost):
mbligh7d2bde82007-08-02 16:26:10 +000030 """
31 This class represents a remote machine controlled through an ssh
mblighdcd57a82007-07-11 23:06:47 +000032 session on which you can run programs.
mbligh7d2bde82007-08-02 16:26:10 +000033
mblighdcd57a82007-07-11 23:06:47 +000034 It is not the machine autoserv is running on. The machine must be
35 configured for password-less login, for example through public key
36 authentication.
mbligh7d2bde82007-08-02 16:26:10 +000037
mbligh3409ee72007-10-16 23:58:33 +000038 It includes support for controlling the machine through a serial
39 console on which you can run programs. If such a serial console is
40 set up on the machine then capabilities such as hard reset and
41 boot strap monitoring are available. If the machine does not have a
42 serial console available then ordinary SSH-based commands will
43 still be available, but attempts to use extensions such as
44 console logging or hard reset will fail silently.
45
mblighdcd57a82007-07-11 23:06:47 +000046 Implementation details:
47 This is a leaf class in an abstract class hierarchy, it must
48 implement the unimplemented methods in parent classes.
49 """
mbligh7d2bde82007-08-02 16:26:10 +000050
mbligh31a49de2007-11-05 18:41:19 +000051 DEFAULT_REBOOT_TIMEOUT = 1800
52 job = None
mbligh0faf91f2007-10-18 03:10:48 +000053
mblighde384372007-10-17 04:25:37 +000054 def __init__(self, hostname, user="root", port=22, initialize=True,
mblighf4e04152008-02-21 16:05:53 +000055 conmux_log="console.log",
mblighe6c995f2007-10-26 19:43:01 +000056 conmux_server=None, conmux_attach=None,
mblighda13d542008-01-03 16:28:34 +000057 netconsole_log=None, netconsole_port=6666, autodir=None):
mbligh7d2bde82007-08-02 16:26:10 +000058 """
59 Construct a SSHHost object
mblighdcd57a82007-07-11 23:06:47 +000060
61 Args:
62 hostname: network hostname or address of remote machine
63 user: user to log in as on the remote machine
64 port: port the ssh daemon is listening on on the remote
65 machine
mbligh9708f732007-10-18 03:18:54 +000066 """
mblighdcd57a82007-07-11 23:06:47 +000067 self.hostname= hostname
68 self.user= user
69 self.port= port
70 self.tmp_dirs= []
mbligh137a05c2007-10-04 15:56:51 +000071 self.initialize = initialize
mblighda13d542008-01-03 16:28:34 +000072 self.autodir = autodir
mbligh91334902007-09-28 01:47:59 +000073
mbligh9708f732007-10-18 03:18:54 +000074 super(SSHHost, self).__init__()
75
mbligh3409ee72007-10-16 23:58:33 +000076 self.conmux_server = conmux_server
mbligh70cf0ec2008-01-18 17:57:14 +000077 if conmux_attach:
78 self.conmux_attach = conmux_attach
79 else:
80 self.conmux_attach = os.path.abspath(os.path.join(
81 self.serverdir, '..',
82 'conmux', 'conmux-attach'))
mblighfbb03542008-02-11 16:27:29 +000083 self.logger_popen = None
mblighf4e04152008-02-21 16:05:53 +000084 self.warning_stream = None
mblighde384372007-10-17 04:25:37 +000085 self.__start_console_log(conmux_log)
mbligh3409ee72007-10-16 23:58:33 +000086
mbligha0452c82007-08-08 20:24:57 +000087 self.bootloader = bootloader.Bootloader(self)
mbligh7d2bde82007-08-02 16:26:10 +000088
mblighc0e92392007-11-05 19:10:10 +000089 self.__netconsole_param = ""
mblighfbb03542008-02-11 16:27:29 +000090 self.netlogger_popen = None
mblighc0e92392007-11-05 19:10:10 +000091 if netconsole_log:
92 self.__init_netconsole_params(netconsole_port)
93 self.__start_netconsole_log(netconsole_log, netconsole_port)
94 self.__load_netconsole_module()
mblighde384372007-10-17 04:25:37 +000095
mbligh7d2bde82007-08-02 16:26:10 +000096
mblighfbb03542008-02-11 16:27:29 +000097 @staticmethod
mblighf4e04152008-02-21 16:05:53 +000098 def __kill(popen):
mblighfbb03542008-02-11 16:27:29 +000099 return_code = popen.poll()
mblighf4e04152008-02-21 16:05:53 +0000100 if return_code is None:
mblighfbb03542008-02-11 16:27:29 +0000101 try:
mblighf4e04152008-02-21 16:05:53 +0000102 os.kill(popen.pid, signal.SIGTERM)
mblighfbb03542008-02-11 16:27:29 +0000103 except OSError:
104 pass
105
106
mblighdcd57a82007-07-11 23:06:47 +0000107 def __del__(self):
mbligh7d2bde82007-08-02 16:26:10 +0000108 """
109 Destroy a SSHHost object
mblighdcd57a82007-07-11 23:06:47 +0000110 """
111 for dir in self.tmp_dirs:
112 try:
113 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
mblighf5427bb2008-04-09 15:55:57 +0000114 except error.AutoservRunError:
mblighdcd57a82007-07-11 23:06:47 +0000115 pass
mblighde384372007-10-17 04:25:37 +0000116 # kill the console logger
mblighfbb03542008-02-11 16:27:29 +0000117 if getattr(self, 'logger_popen', None):
mblighf4e04152008-02-21 16:05:53 +0000118 self.__kill(self.logger_popen)
mbligh6607d192008-04-17 15:23:15 +0000119 if self.job:
120 self.job.warning_loggers.discard(
121 self.warning_stream)
mblighf4e04152008-02-21 16:05:53 +0000122 self.warning_stream.close()
mblighde384372007-10-17 04:25:37 +0000123 # kill the netconsole logger
mblighfbb03542008-02-11 16:27:29 +0000124 if getattr(self, 'netlogger_popen', None):
mblighe6c995f2007-10-26 19:43:01 +0000125 self.__unload_netconsole_module()
mblighf4e04152008-02-21 16:05:53 +0000126 self.__kill(self.netlogger_popen)
mblighde384372007-10-17 04:25:37 +0000127
128
129 def __init_netconsole_params(self, port):
130 """
131 Connect to the remote machine and determine the values to use for the
132 required netconsole parameters.
133 """
mblighde384372007-10-17 04:25:37 +0000134 # PROBLEM: on machines with multiple IPs this may not make any sense
135 # It also doesn't work with IPv6
136 remote_ip = socket.gethostbyname(self.hostname)
137 local_ip = socket.gethostbyname(socket.gethostname())
138 # Get the gateway of the remote machine
139 try:
140 traceroute = self.run('traceroute -n %s' % local_ip)
mblighf5427bb2008-04-09 15:55:57 +0000141 except error.AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000142 return
143 first_node = traceroute.stdout.split("\n")[0]
144 match = re.search(r'\s+((\d+\.){3}\d+)\s+', first_node)
145 if match:
146 router_ip = match.group(1)
147 else:
148 return
149 # Look up the MAC address of the gateway
150 try:
151 self.run('ping -c 1 %s' % router_ip)
152 arp = self.run('arp -n -a %s' % router_ip)
mblighf5427bb2008-04-09 15:55:57 +0000153 except error.AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000154 return
155 match = re.search(r'\s+(([0-9A-F]{2}:){5}[0-9A-F]{2})\s+', arp.stdout)
156 if match:
157 gateway_mac = match.group(1)
158 else:
159 return
160 self.__netconsole_param = 'netconsole=@%s/,%s@%s/%s' % (remote_ip,
161 port,
162 local_ip,
163 gateway_mac)
164
165
166 def __start_netconsole_log(self, logfilename, port):
167 """
168 Log the output of netconsole to a specified file
169 """
170 if logfilename == None:
171 return
172 cmd = ['nc', '-u', '-l', '-p', str(port)]
mblighfbb03542008-02-11 16:27:29 +0000173 logfile = open(logfilename, 'a', 0)
174 self.netlogger_popen = subprocess.Popen(cmd, stdout=logfile)
mblighde384372007-10-17 04:25:37 +0000175
176
177 def __load_netconsole_module(self):
178 """
179 Make a best effort to load the netconsole module.
180
181 Note that loading the module can fail even when the remote machine is
182 working correctly if netconsole is already compiled into the kernel
183 and started.
184 """
mblighc0e92392007-11-05 19:10:10 +0000185 if not self.__netconsole_param:
186 return
mblighde384372007-10-17 04:25:37 +0000187 try:
188 self.run('modprobe netconsole %s' % self.__netconsole_param)
mblighf5427bb2008-04-09 15:55:57 +0000189 except error.AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000190 # if it fails there isn't much we can do, just keep going
191 pass
192
193
194 def __unload_netconsole_module(self):
195 try:
196 self.run('modprobe -r netconsole')
mblighf5427bb2008-04-09 15:55:57 +0000197 except error.AutoservRunError:
mblighde384372007-10-17 04:25:37 +0000198 pass
mbligh3409ee72007-10-16 23:58:33 +0000199
200
mbligh5deff3d2008-01-04 21:21:28 +0000201 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT):
mblighd567f722007-10-30 15:37:33 +0000202 if not self.wait_down(300): # Make sure he's dead, Jim
mblighf3b78932007-11-07 16:52:47 +0000203 self.__record("ABORT", None, "reboot.verify", "shutdown failed")
mblighf5427bb2008-04-09 15:55:57 +0000204 raise error.AutoservRebootError(
205 "Host did not shut down")
mbligh3409ee72007-10-16 23:58:33 +0000206 self.wait_up(timeout)
207 time.sleep(2) # this is needed for complete reliability
mblighcf3d83a2007-11-05 19:21:39 +0000208 if self.wait_up(timeout):
mbligh30270302007-11-05 20:33:52 +0000209 self.__record("GOOD", None, "reboot.verify")
mblighcf3d83a2007-11-05 19:21:39 +0000210 else:
mbligh71d24222008-03-11 21:31:56 +0000211 self.__record("ABORT", None, "reboot.verify", "Host did not return from reboot")
mblighf5427bb2008-04-09 15:55:57 +0000212 raise error.AutoservRebootError(
213 "Host did not return from reboot")
mbligh3409ee72007-10-16 23:58:33 +0000214 print "Reboot complete"
215
216
mbligh80d20772007-10-29 17:10:10 +0000217 def hardreset(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True):
mbligh3409ee72007-10-16 23:58:33 +0000218 """
219 Reach out and slap the box in the power switch
220 """
mblighf3b78932007-11-07 16:52:47 +0000221 if not self.__console_run(r"'~$hardreset'"):
222 self.__record("ABORT", None, "reboot.start", "hard reset unavailable")
mblighf5427bb2008-04-09 15:55:57 +0000223 raise error.AutoservUnsupportedError(
224 'Hard reset unavailable')
mbligh37d53c32008-01-14 16:16:00 +0000225
226 if wait:
227 self.wait_for_restart(timeout)
mbligha4d4f372008-01-22 15:49:50 +0000228 self.__record("GOOD", None, "reboot.start", "hard reset")
mbligh3409ee72007-10-16 23:58:33 +0000229
230
mblighe6c995f2007-10-26 19:43:01 +0000231 def __conmux_hostname(self):
232 if self.conmux_server:
233 return '%s/%s' % (self.conmux_server, self.hostname)
234 else:
235 return self.hostname
236
237
mbligh3409ee72007-10-16 23:58:33 +0000238 def __start_console_log(self, logfilename):
239 """
240 Log the output of the console session to a specified file
241 """
242 if logfilename == None:
243 return
244 if not self.conmux_attach or not os.path.exists(self.conmux_attach):
245 return
mblighf4e04152008-02-21 16:05:53 +0000246
247 r, w = os.pipe()
248 script_path = os.path.join(self.serverdir,
249 'warning_monitor.py')
mblighfbb03542008-02-11 16:27:29 +0000250 cmd = [self.conmux_attach, self.__conmux_hostname(),
mblighf4e04152008-02-21 16:05:53 +0000251 '%s %s %s %d' % (sys.executable, script_path,
252 logfilename, w)]
mbligh0c5ce312008-02-21 16:24:11 +0000253 dev_null = open(os.devnull, 'w')
mbligh3409ee72007-10-16 23:58:33 +0000254
mblighf4e04152008-02-21 16:05:53 +0000255 self.warning_stream = os.fdopen(r, 'r', 0)
mbligh6607d192008-04-17 15:23:15 +0000256 if self.job:
257 self.job.warning_loggers.add(self.warning_stream)
mblighf4e04152008-02-21 16:05:53 +0000258 self.logger_popen = subprocess.Popen(cmd, stderr=dev_null)
259 os.close(w)
mblighe6c995f2007-10-26 19:43:01 +0000260
261
mbligh3409ee72007-10-16 23:58:33 +0000262 def __console_run(self, cmd):
263 """
264 Send a command to the conmux session
265 """
266 if not self.conmux_attach or not os.path.exists(self.conmux_attach):
267 return False
mbligh3409ee72007-10-16 23:58:33 +0000268 cmd = '%s %s echo %s 2> /dev/null' % (self.conmux_attach,
mblighe6c995f2007-10-26 19:43:01 +0000269 self.__conmux_hostname(),
mbligh3409ee72007-10-16 23:58:33 +0000270 cmd)
mbligh0f5ad642008-01-22 16:37:40 +0000271 result = utils.system(cmd, ignore_status=True)
mbligh3409ee72007-10-16 23:58:33 +0000272 return result == 0
mbligh7d2bde82007-08-02 16:26:10 +0000273
274
mbligh31a49de2007-11-05 18:41:19 +0000275 def __record(self, status_code, subdir, operation, status = ''):
276 if self.job:
277 self.job.record(status_code, subdir, operation, status)
278 else:
279 if not subdir:
280 subdir = "----"
281 msg = "%s\t%s\t%s\t%s" % (status_code, subdir, operation, status)
282 sys.stderr.write(msg + "\n")
283
284
mblighfa971602008-01-03 01:57:20 +0000285 def ssh_base_command(self, connect_timeout=30):
286 SSH_BASE_COMMAND = '/usr/bin/ssh -a -x -o ' + \
mbligh0ad21ba2008-03-14 15:06:21 +0000287 'BatchMode=yes -o ConnectTimeout=%d ' + \
288 '-o ServerAliveInterval=300'
mblighfa971602008-01-03 01:57:20 +0000289 assert isinstance(connect_timeout, (int, long))
290 assert connect_timeout > 0 # can't disable the timeout
291 return SSH_BASE_COMMAND % connect_timeout
292
293
294 def ssh_command(self, connect_timeout=30):
mblighe6647d12007-10-17 00:00:01 +0000295 """Construct an ssh command with proper args for this host."""
mblighfa971602008-01-03 01:57:20 +0000296 ssh = self.ssh_base_command(connect_timeout)
297 return r'%s -l %s -p %d %s' % (ssh,
mbligh0faf91f2007-10-18 03:10:48 +0000298 self.user,
299 self.port,
300 self.hostname)
mblighe6647d12007-10-17 00:00:01 +0000301
302
mbligh07a923f2008-01-16 17:49:04 +0000303 def run(self, command, timeout=3600, ignore_status=False,
mblighfa971602008-01-03 01:57:20 +0000304 stdout_tee=None, stderr_tee=None, connect_timeout=30):
mbligh7d2bde82007-08-02 16:26:10 +0000305 """
306 Run a command on the remote host.
mblighdcd57a82007-07-11 23:06:47 +0000307
308 Args:
309 command: the command line string
310 timeout: time limit in seconds before attempting to
311 kill the running process. The run() function
312 will take a few seconds longer than 'timeout'
313 to complete if it has to kill the process.
mbligh8b85dfb2007-08-28 09:50:31 +0000314 ignore_status: do not raise an exception, no matter
315 what the exit code of the command is.
mblighdcd57a82007-07-11 23:06:47 +0000316
317 Returns:
318 a hosts.base_classes.CmdResult object
319
320 Raises:
321 AutoservRunError: the exit code of the command
322 execution was not 0
mblighcaa62c22008-04-07 21:51:17 +0000323 AutoservSSHTimeout: ssh connection has timed out
mblighdcd57a82007-07-11 23:06:47 +0000324 """
mblighadf2aab2007-11-29 18:16:43 +0000325 stdout = stdout_tee or sys.stdout
mbligh8d4baaa2008-03-12 14:48:24 +0000326 stderr = stderr_tee or sys.stdout
mbligh7995cc62007-11-30 15:53:23 +0000327 print "ssh: %s" % (command,)
mblighadf2aab2007-11-29 18:16:43 +0000328 env = " ".join("=".join(pair) for pair in self.env.iteritems())
mbligh34faa282008-01-16 17:44:49 +0000329 full_cmd = '%s "%s %s"' % (self.ssh_command(connect_timeout),
330 env, utils.sh_escape(command))
331 result = utils.run(full_cmd, timeout, True, stdout, stderr)
332 if result.exit_status == 255: # ssh's exit status for timeout
333 if re.match(r'^ssh: connect to host .* port .*: ' +
334 r'Connection timed out\r$', result.stderr):
mblighf5427bb2008-04-09 15:55:57 +0000335 raise error.AutoservSSHTimeout("ssh timed out",
336 result)
mbligh34faa282008-01-16 17:44:49 +0000337 if not ignore_status and result.exit_status > 0:
mblighf5427bb2008-04-09 15:55:57 +0000338 raise error.AutoservRunError("command execution error",
339 result)
mblighdcd57a82007-07-11 23:06:47 +0000340 return result
mbligh7d2bde82007-08-02 16:26:10 +0000341
342
mblighbda9c9c2008-04-08 17:45:00 +0000343 def run_short(self, command, **kwargs):
344 """
345 Calls the run() command with a short default timeout.
346
347 Args:
348 Takes the same arguments as does run(),
349 with the exception of the timeout argument which
350 here is fixed at 60 seconds.
351 It returns the result of run.
352 """
353 return self.run(command, timeout=60, **kwargs)
354
355
mbligh78669ff2008-01-10 16:33:07 +0000356 def run_grep(self, command, timeout=30, ignore_status=False,
357 stdout_ok_regexp=None, stdout_err_regexp=None,
358 stderr_ok_regexp=None, stderr_err_regexp=None,
359 connect_timeout=30):
360 """
361 Run a command on the remote host and look for regexp
362 in stdout or stderr to determine if the command was
363 successul or not.
mbligh6a2a2df2008-01-16 17:41:55 +0000364
mbligh78669ff2008-01-10 16:33:07 +0000365 Args:
366 command: the command line string
mbligh6a2a2df2008-01-16 17:41:55 +0000367 timeout: time limit in seconds before attempting to
mbligh78669ff2008-01-10 16:33:07 +0000368 kill the running process. The run() function
369 will take a few seconds longer than 'timeout'
370 to complete if it has to kill the process.
mbligh6a2a2df2008-01-16 17:41:55 +0000371 ignore_status: do not raise an exception, no matter
mbligh78669ff2008-01-10 16:33:07 +0000372 what the exit code of the command is.
373 stdout_ok_regexp: regexp that should be in stdout
374 if the command was successul.
375 stdout_err_regexp: regexp that should be in stdout
376 if the command failed.
377 stderr_ok_regexp: regexp that should be in stderr
378 if the command was successul.
379 stderr_err_regexp: regexp that should be in stderr
380 if the command failed.
mbligh6a2a2df2008-01-16 17:41:55 +0000381
mbligh78669ff2008-01-10 16:33:07 +0000382 Returns:
383 if the command was successul, raises an exception
384 otherwise.
mbligh6a2a2df2008-01-16 17:41:55 +0000385
mbligh78669ff2008-01-10 16:33:07 +0000386 Raises:
387 AutoservRunError:
388 - the exit code of the command execution was not 0.
mbligh6a2a2df2008-01-16 17:41:55 +0000389 - If stderr_err_regexp is found in stderr,
390 - If stdout_err_regexp is found in stdout,
mbligh78669ff2008-01-10 16:33:07 +0000391 - If stderr_ok_regexp is not found in stderr.
392 - If stdout_ok_regexp is not found in stdout,
393 """
394
395 # We ignore the status, because we will handle it at the end.
396 result = self.run(command, timeout, ignore_status=True,
mbligh6a2a2df2008-01-16 17:41:55 +0000397 connect_timeout=connect_timeout)
mbligh78669ff2008-01-10 16:33:07 +0000398
399 # Look for the patterns, in order
400 for (regexp, stream) in ((stderr_err_regexp, result.stderr),
mbligh6a2a2df2008-01-16 17:41:55 +0000401 (stdout_err_regexp, result.stdout)):
mbligh78669ff2008-01-10 16:33:07 +0000402 if regexp and stream:
403 err_re = re.compile (regexp)
404 if err_re.search(stream):
mblighf5427bb2008-04-09 15:55:57 +0000405 raise error.AutoservRunError(
mbligh6a2a2df2008-01-16 17:41:55 +0000406 '%s failed, found error pattern: '
407 '"%s"' % (command, regexp), result)
mbligh78669ff2008-01-10 16:33:07 +0000408
409 for (regexp, stream) in ((stderr_ok_regexp, result.stderr),
mbligh6a2a2df2008-01-16 17:41:55 +0000410 (stdout_ok_regexp, result.stdout)):
mbligh78669ff2008-01-10 16:33:07 +0000411 if regexp and stream:
412 ok_re = re.compile (regexp)
413 if ok_re.search(stream):
414 if ok_re.search(stream):
415 return
416
417 if not ignore_status and result.exit_status > 0:
mblighf5427bb2008-04-09 15:55:57 +0000418 raise error.AutoservRunError("command execution error",
419 result)
mbligh78669ff2008-01-10 16:33:07 +0000420
421
mbligh80d20772007-10-29 17:10:10 +0000422 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, label=None,
423 kernel_args=None, wait=True):
mbligh7d2bde82007-08-02 16:26:10 +0000424 """
425 Reboot the remote host.
mbligh8b85dfb2007-08-28 09:50:31 +0000426
mbligha0452c82007-08-08 20:24:57 +0000427 Args:
428 timeout
mbligh8b85dfb2007-08-28 09:50:31 +0000429 """
mbligh33ae0902007-11-24 19:27:08 +0000430 self.reboot_setup()
431
mblighde384372007-10-17 04:25:37 +0000432 # forcibly include the "netconsole" kernel arg
433 if self.__netconsole_param:
434 if kernel_args is None:
435 kernel_args = self.__netconsole_param
436 else:
437 kernel_args += " " + self.__netconsole_param
438 # unload the (possibly loaded) module to avoid shutdown issues
439 self.__unload_netconsole_module()
mbligha0452c82007-08-08 20:24:57 +0000440 if label or kernel_args:
441 self.bootloader.install_boottool()
442 if label:
443 self.bootloader.set_default(label)
444 if kernel_args:
445 if not label:
446 default = int(self.bootloader.get_default())
447 label = self.bootloader.get_titles()[default]
448 self.bootloader.add_args(label, kernel_args)
mblighd742a222007-09-30 01:27:06 +0000449 print "Reboot: initiating reboot"
mbligh30270302007-11-05 20:33:52 +0000450 self.__record("GOOD", None, "reboot.start")
mblighcf3d83a2007-11-05 19:21:39 +0000451 try:
mblighf3b78932007-11-07 16:52:47 +0000452 self.run('(sleep 5; reboot) </dev/null >/dev/null 2>&1 &')
mblighf5427bb2008-04-09 15:55:57 +0000453 except error.AutoservRunError:
mblighf3b78932007-11-07 16:52:47 +0000454 self.__record("ABORT", None, "reboot.start",
455 "reboot command failed")
mblighcf3d83a2007-11-05 19:21:39 +0000456 raise
mbligha0452c82007-08-08 20:24:57 +0000457 if wait:
mbligh5deff3d2008-01-04 21:21:28 +0000458 self.wait_for_restart(timeout)
mblighde384372007-10-17 04:25:37 +0000459 self.__load_netconsole_module() # if the builtin fails
mbligha0452c82007-08-08 20:24:57 +0000460
mbligh7d2bde82007-08-02 16:26:10 +0000461
mblighcfc7ab32008-01-25 16:35:28 +0000462 def __copy_files(self, sources, dest):
463 """
464 Copy files from one machine to another.
465
466 This is for internal use by other methods that intend to move
467 files between machines. It expects a list of source files and
468 a destination (a filename if the source is a single file, a
469 destination otherwise). The names must already be
470 pre-processed into the appropriate rsync/scp friendly
471 format (%s@%s:%s).
472 """
473 # wait until there are only a small number of copies running
474 # before starting this one
475 MAXIMUM_SIMULTANEOUS_COPIES = 4
476 while True:
477 copy_count = 0
478 procs = utils.system_output('ps -ef')
479 for line in procs:
480 if 'rsync ' in line or 'scp ' in line:
481 copy_count += 1
482 if copy_count < MAXIMUM_SIMULTANEOUS_COPIES:
483 break
484 time.sleep(60)
485
mbligh22fdf172008-04-07 18:34:56 +0000486 print '__copy_files: copying %s to %s' % (sources, dest)
mblighcfc7ab32008-01-25 16:35:28 +0000487 try:
488 utils.run('rsync --rsh="%s" -az %s %s' % (
489 self.ssh_base_command(), ' '.join(sources), dest))
490 except Exception:
491 utils.run('scp -rpq %s "%s"' % (
492 ' '.join(sources), dest))
493
494
mblighdcd57a82007-07-11 23:06:47 +0000495 def get_file(self, source, dest):
mbligh7d2bde82007-08-02 16:26:10 +0000496 """
497 Copy files from the remote host to a local path.
mblighdcd57a82007-07-11 23:06:47 +0000498
499 Directories will be copied recursively.
500 If a source component is a directory with a trailing slash,
501 the content of the directory will be copied, otherwise, the
502 directory itself and its content will be copied. This
503 behavior is similar to that of the program 'rsync'.
504
505 Args:
506 source: either
507 1) a single file or directory, as a string
508 2) a list of one or more (possibly mixed)
509 files or directories
510 dest: a file or a directory (if source contains a
511 directory or more than one element, you must
512 supply a directory dest)
513
514 Raises:
515 AutoservRunError: the scp command failed
516 """
517 if isinstance(source, types.StringTypes):
518 source= [source]
519
520 processed_source= []
521 for entry in source:
522 if entry.endswith('/'):
523 format_string= '%s@%s:"%s*"'
524 else:
525 format_string= '%s@%s:"%s"'
526 entry= format_string % (self.user, self.hostname,
527 utils.scp_remote_escape(entry))
528 processed_source.append(entry)
529
530 processed_dest= os.path.abspath(dest)
531 if os.path.isdir(dest):
532 processed_dest= "%s/" % (utils.sh_escape(processed_dest),)
533 else:
534 processed_dest= utils.sh_escape(processed_dest)
mblighcfc7ab32008-01-25 16:35:28 +0000535
536 self.__copy_files(processed_source, processed_dest)
mbligh7d2bde82007-08-02 16:26:10 +0000537
538
mblighdcd57a82007-07-11 23:06:47 +0000539 def send_file(self, source, dest):
mbligh7d2bde82007-08-02 16:26:10 +0000540 """
541 Copy files from a local path to the remote host.
mblighdcd57a82007-07-11 23:06:47 +0000542
543 Directories will be copied recursively.
544 If a source component is a directory with a trailing slash,
545 the content of the directory will be copied, otherwise, the
546 directory itself and its content will be copied. This
547 behavior is similar to that of the program 'rsync'.
548
549 Args:
550 source: either
551 1) a single file or directory, as a string
552 2) a list of one or more (possibly mixed)
553 files or directories
554 dest: a file or a directory (if source contains a
555 directory or more than one element, you must
556 supply a directory dest)
557
558 Raises:
559 AutoservRunError: the scp command failed
560 """
561 if isinstance(source, types.StringTypes):
562 source= [source]
563
564 processed_source= []
565 for entry in source:
566 if entry.endswith('/'):
567 format_string= '"%s/"*'
568 else:
569 format_string= '"%s"'
570 entry= format_string % (utils.sh_escape(os.path.abspath(entry)),)
571 processed_source.append(entry)
mbligh7d2bde82007-08-02 16:26:10 +0000572
mbligh0faf91f2007-10-18 03:10:48 +0000573 remote_dest = '%s@%s:"%s"' % (
574 self.user, self.hostname,
575 utils.scp_remote_escape(dest))
mblighcfc7ab32008-01-25 16:35:28 +0000576
577 self.__copy_files(processed_source, remote_dest)
mbligha4eb0fa2008-04-11 15:16:50 +0000578 self.run('find "%s" -type d | xargs -i -r chmod o+rx "{}"' % dest)
579 self.run('find "%s" -type f | xargs -i -r chmod o+r "{}"' % dest)
mbligh7d2bde82007-08-02 16:26:10 +0000580
mblighdcd57a82007-07-11 23:06:47 +0000581 def get_tmp_dir(self):
mbligh7d2bde82007-08-02 16:26:10 +0000582 """
583 Return the pathname of a directory on the host suitable
mblighdcd57a82007-07-11 23:06:47 +0000584 for temporary file storage.
585
586 The directory and its content will be deleted automatically
587 on the destruction of the Host object that was used to obtain
588 it.
589 """
mbligha25b29e2007-08-26 13:58:04 +0000590 dir_name= self.run("mktemp -d /tmp/autoserv-XXXXXX").stdout.rstrip(" \n")
mblighdcd57a82007-07-11 23:06:47 +0000591 self.tmp_dirs.append(dir_name)
592 return dir_name
mbligh7d2bde82007-08-02 16:26:10 +0000593
594
mblighdcd57a82007-07-11 23:06:47 +0000595 def is_up(self):
mbligh7d2bde82007-08-02 16:26:10 +0000596 """
597 Check if the remote host is up.
mblighdcd57a82007-07-11 23:06:47 +0000598
599 Returns:
600 True if the remote host is up, False otherwise
601 """
602 try:
mbligh4cfa76a2007-11-26 20:45:16 +0000603 self.ssh_ping()
mbligheadfbb12007-11-26 23:03:12 +0000604 except:
mblighdcd57a82007-07-11 23:06:47 +0000605 return False
mbligheadfbb12007-11-26 23:03:12 +0000606 return True
mbligh7d2bde82007-08-02 16:26:10 +0000607
mbligh7d2bde82007-08-02 16:26:10 +0000608
mblighdcd57a82007-07-11 23:06:47 +0000609 def wait_up(self, timeout=None):
mbligh7d2bde82007-08-02 16:26:10 +0000610 """
611 Wait until the remote host is up or the timeout expires.
mblighdcd57a82007-07-11 23:06:47 +0000612
613 In fact, it will wait until an ssh connection to the remote
614 host can be established.
615
616 Args:
617 timeout: time limit in seconds before returning even
618 if the host is not up.
619
620 Returns:
621 True if the host was found to be up, False otherwise
622 """
623 if timeout:
624 end_time= time.time() + timeout
625
626 while not timeout or time.time() < end_time:
627 try:
mbligh4cfa76a2007-11-26 20:45:16 +0000628 self.ssh_ping()
mbligheadfbb12007-11-26 23:03:12 +0000629 except:
mblighdcd57a82007-07-11 23:06:47 +0000630 pass
631 else:
mbligheadfbb12007-11-26 23:03:12 +0000632 return True
mblighdcd57a82007-07-11 23:06:47 +0000633 time.sleep(1)
634
635 return False
mbligh7d2bde82007-08-02 16:26:10 +0000636
637
mblighdcd57a82007-07-11 23:06:47 +0000638 def wait_down(self, timeout=None):
mbligh7d2bde82007-08-02 16:26:10 +0000639 """
640 Wait until the remote host is down or the timeout expires.
mblighdcd57a82007-07-11 23:06:47 +0000641
642 In fact, it will wait until an ssh connection to the remote
643 host fails.
644
645 Args:
646 timeout: time limit in seconds before returning even
647 if the host is not up.
648
649 Returns:
650 True if the host was found to be down, False otherwise
651 """
652 if timeout:
653 end_time= time.time() + timeout
654
655 while not timeout or time.time() < end_time:
656 try:
mbligh4cfa76a2007-11-26 20:45:16 +0000657 self.ssh_ping()
mbligheadfbb12007-11-26 23:03:12 +0000658 except:
mblighdcd57a82007-07-11 23:06:47 +0000659 return True
mblighdcd57a82007-07-11 23:06:47 +0000660 time.sleep(1)
661
662 return False
mbligh7d2bde82007-08-02 16:26:10 +0000663
664
mblighdbe4a382007-07-26 19:41:28 +0000665 def ensure_up(self):
mbligh7d2bde82007-08-02 16:26:10 +0000666 """
667 Ensure the host is up if it is not then do not proceed;
668 this prevents cacading failures of tests
669 """
mbligha0452c82007-08-08 20:24:57 +0000670 print 'Ensuring that %s is up before continuing' % self.hostname
671 if hasattr(self, 'hardreset') and not self.wait_up(300):
mblighdbe4a382007-07-26 19:41:28 +0000672 print "Performing a hardreset on %s" % self.hostname
mbligh4ba0b462007-11-05 23:05:40 +0000673 try:
674 self.hardreset()
mblighf5427bb2008-04-09 15:55:57 +0000675 except error.AutoservUnsupportedError:
mbligh4ba0b462007-11-05 23:05:40 +0000676 print "Hardreset is unsupported on %s" % self.hostname
mbligha9563b92007-10-25 14:45:56 +0000677 if not self.wait_up(60 * 30):
678 # 30 minutes should be more than enough
mblighf5427bb2008-04-09 15:55:57 +0000679 raise error.AutoservHostError
mbligha0452c82007-08-08 20:24:57 +0000680 print 'Host up, continuing'
mbligh7d2bde82007-08-02 16:26:10 +0000681
682
mblighdcd57a82007-07-11 23:06:47 +0000683 def get_num_cpu(self):
mbligh7d2bde82007-08-02 16:26:10 +0000684 """
685 Get the number of CPUs in the host according to
mblighdcd57a82007-07-11 23:06:47 +0000686 /proc/cpuinfo.
687
688 Returns:
689 The number of CPUs
690 """
691
mbligh0ba35792008-04-15 19:16:11 +0000692 proc_cpuinfo = self.run("cat /proc/cpuinfo",
693 stdout_tee=open('/dev/null', 'w')).stdout
mblighdcd57a82007-07-11 23:06:47 +0000694 cpus = 0
695 for line in proc_cpuinfo.splitlines():
696 if line.startswith('processor'):
697 cpus += 1
698 return cpus
mbligh5f876ad2007-10-12 23:59:53 +0000699
700
701 def check_uptime(self):
702 """
703 Check that uptime is available and monotonically increasing.
704 """
705 if not self.ping():
mblighf5427bb2008-04-09 15:55:57 +0000706 raise error.AutoservHostError('Client is not pingable')
mbligh5f876ad2007-10-12 23:59:53 +0000707 result = self.run("/bin/cat /proc/uptime", 30)
708 return result.stdout.strip().split()[0]
709
710
711 def get_arch(self):
712 """
713 Get the hardware architecture of the remote machine
714 """
715 arch = self.run('/bin/uname -m').stdout.rstrip()
716 if re.match(r'i\d86$', arch):
717 arch = 'i386'
718 return arch
719
720
721 def get_kernel_ver(self):
722 """
723 Get the kernel version of the remote machine
724 """
725 return self.run('/bin/uname -r').stdout.rstrip()
726
727
728 def get_cmdline(self):
729 """
730 Get the kernel command line of the remote machine
731 """
732 return self.run('cat /proc/cmdline').stdout.rstrip()
733
734
735 def ping(self):
736 """
737 Ping the remote system, and return whether it's available
738 """
739 fpingcmd = "%s -q %s" % ('/usr/bin/fping', self.hostname)
740 rc = utils.system(fpingcmd, ignore_status = 1)
741 return (rc == 0)
mblighd2e46052007-11-05 18:31:00 +0000742
mblighf014ff42007-11-26 21:33:11 +0000743
mbligh4cfa76a2007-11-26 20:45:16 +0000744 def ssh_ping(self, timeout = 60):
mbligh4ff46b02008-02-01 17:33:37 +0000745 self.run('true', timeout = timeout, connect_timeout = timeout)
mblighda13d542008-01-03 16:28:34 +0000746
747
748 def get_autodir(self):
749 return self.autodir