blob: ef179f5949ea053d458e81ad3ba97cebc04e0786 [file] [log] [blame]
showardca572982009-09-18 21:20:01 +00001import os, time, types, socket, shutil, glob, logging, traceback
mblighefccc1b2010-01-11 19:08:42 +00002from autotest_lib.client.common_lib import autotemp, error, logging_manager
jadmanski31c49b72008-10-27 20:44:48 +00003from autotest_lib.server import utils, autotest
mblighe8b93af2009-01-30 00:45:53 +00004from autotest_lib.server.hosts import remote
mblighefccc1b2010-01-11 19:08:42 +00005from autotest_lib.client.common_lib.global_config import global_config
jadmanskica7da372008-10-21 16:26:52 +00006
7
mblighb86bfa12010-02-12 20:22:21 +00008get_value = global_config.get_config_value
9enable_master_ssh = get_value('AUTOSERV', 'enable_master_ssh', type=bool,
10 default=False)
mblighefccc1b2010-01-11 19:08:42 +000011
12
lmraf676f32010-02-04 03:36:26 +000013def make_ssh_command(user="root", port=22, opts='', hosts_file='/dev/null',
14 connect_timeout=30, alive_interval=300):
15 base_command = ("/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no "
16 "-o UserKnownHostsFile=%s -o BatchMode=yes "
mblighefccc1b2010-01-11 19:08:42 +000017 "-o ConnectTimeout=%d -o ServerAliveInterval=%d "
jadmanskica7da372008-10-21 16:26:52 +000018 "-l %s -p %d")
19 assert isinstance(connect_timeout, (int, long))
20 assert connect_timeout > 0 # can't disable the timeout
lmraf676f32010-02-04 03:36:26 +000021 return base_command % (opts, hosts_file, connect_timeout,
22 alive_interval, user, port)
jadmanskica7da372008-10-21 16:26:52 +000023
24
mblighe8b93af2009-01-30 00:45:53 +000025# import site specific Host class
26SiteHost = utils.import_site_class(
27 __file__, "autotest_lib.server.hosts.site_host", "SiteHost",
28 remote.RemoteHost)
29
30
31class AbstractSSHHost(SiteHost):
mblighbc9402b2009-12-29 01:15:34 +000032 """
33 This class represents a generic implementation of most of the
jadmanskica7da372008-10-21 16:26:52 +000034 framework necessary for controlling a host via ssh. It implements
35 almost all of the abstract Host methods, except for the core
mblighbc9402b2009-12-29 01:15:34 +000036 Host.run method.
37 """
jadmanskica7da372008-10-21 16:26:52 +000038
jadmanskif6562912008-10-21 17:59:01 +000039 def _initialize(self, hostname, user="root", port=22, password="",
40 *args, **dargs):
41 super(AbstractSSHHost, self)._initialize(hostname=hostname,
42 *args, **dargs)
mbligh6369cf22008-10-24 17:21:57 +000043 self.ip = socket.getaddrinfo(self.hostname, None)[0][4][0]
jadmanskica7da372008-10-21 16:26:52 +000044 self.user = user
45 self.port = port
46 self.password = password
showard6eafb492010-01-15 20:29:06 +000047 self._use_rsync = None
lmraf676f32010-02-04 03:36:26 +000048 self.known_hosts_file = os.tmpfile()
49 known_hosts_fd = self.known_hosts_file.fileno()
50 self.known_hosts_fd = '/dev/fd/%s' % known_hosts_fd
jadmanskica7da372008-10-21 16:26:52 +000051
mblighefccc1b2010-01-11 19:08:42 +000052 """
53 Master SSH connection background job, socket temp directory and socket
54 control path option. If master-SSH is enabled, these fields will be
55 initialized by start_master_ssh when a new SSH connection is initiated.
56 """
57 self.master_ssh_job = None
58 self.master_ssh_tempdir = None
59 self.master_ssh_option = ''
60
showard6eafb492010-01-15 20:29:06 +000061
62 def use_rsync(self):
63 if self._use_rsync is not None:
64 return self._use_rsync
65
mblighc9892c02010-01-06 19:02:16 +000066 # Check if rsync is available on the remote host. If it's not,
67 # don't try to use it for any future file transfers.
showard6eafb492010-01-15 20:29:06 +000068 self._use_rsync = self._check_rsync()
69 if not self._use_rsync:
mblighc9892c02010-01-06 19:02:16 +000070 logging.warn("rsync not available on remote host %s -- disabled",
71 self.hostname)
72
73
74 def _check_rsync(self):
75 """
76 Check if rsync is available on the remote host.
77 """
78 try:
79 self.run("rsync --version", stdout_tee=None, stderr_tee=None)
80 except error.AutoservRunError:
81 return False
82 return True
83
jadmanskica7da372008-10-21 16:26:52 +000084
showard56176ec2009-10-28 19:52:30 +000085 def _encode_remote_paths(self, paths, escape=True):
mblighbc9402b2009-12-29 01:15:34 +000086 """
87 Given a list of file paths, encodes it as a single remote path, in
88 the style used by rsync and scp.
89 """
showard56176ec2009-10-28 19:52:30 +000090 if escape:
91 paths = [utils.scp_remote_escape(path) for path in paths]
92 return '%s@%s:"%s"' % (self.user, self.hostname, " ".join(paths))
jadmanskica7da372008-10-21 16:26:52 +000093
jadmanskica7da372008-10-21 16:26:52 +000094
mbligh45561782009-05-11 21:14:34 +000095 def _make_rsync_cmd(self, sources, dest, delete_dest, preserve_symlinks):
mblighbc9402b2009-12-29 01:15:34 +000096 """
97 Given a list of source paths and a destination path, produces the
jadmanskid7b79ed2009-01-07 17:19:48 +000098 appropriate rsync command for copying them. Remote paths must be
mblighbc9402b2009-12-29 01:15:34 +000099 pre-encoded.
100 """
lmraf676f32010-02-04 03:36:26 +0000101 ssh_cmd = make_ssh_command(user=self.user, port=self.port,
102 opts=self.master_ssh_option,
103 hosts_file=self.known_hosts_fd)
jadmanskid7b79ed2009-01-07 17:19:48 +0000104 if delete_dest:
105 delete_flag = "--delete"
106 else:
107 delete_flag = ""
mbligh45561782009-05-11 21:14:34 +0000108 if preserve_symlinks:
109 symlink_flag = ""
110 else:
111 symlink_flag = "-L"
112 command = "rsync %s %s --timeout=1800 --rsh='%s' -az %s %s"
113 return command % (symlink_flag, delete_flag, ssh_cmd,
114 " ".join(sources), dest)
jadmanskid7b79ed2009-01-07 17:19:48 +0000115
116
117 def _make_scp_cmd(self, sources, dest):
mblighbc9402b2009-12-29 01:15:34 +0000118 """
119 Given a list of source paths and a destination path, produces the
jadmanskid7b79ed2009-01-07 17:19:48 +0000120 appropriate scp command for encoding it. Remote paths must be
mblighbc9402b2009-12-29 01:15:34 +0000121 pre-encoded.
122 """
mblighc0649d62010-01-15 18:15:58 +0000123 command = ("scp -rq %s -o StrictHostKeyChecking=no "
lmraf676f32010-02-04 03:36:26 +0000124 "-o UserKnownHostsFile=%s -P %d %s '%s'")
125 return command % (self.master_ssh_option, self.known_hosts_fd,
mblighefccc1b2010-01-11 19:08:42 +0000126 self.port, " ".join(sources), dest)
jadmanskid7b79ed2009-01-07 17:19:48 +0000127
128
129 def _make_rsync_compatible_globs(self, path, is_local):
mblighbc9402b2009-12-29 01:15:34 +0000130 """
131 Given an rsync-style path, returns a list of globbed paths
jadmanskid7b79ed2009-01-07 17:19:48 +0000132 that will hopefully provide equivalent behaviour for scp. Does not
133 support the full range of rsync pattern matching behaviour, only that
134 exposed in the get/send_file interface (trailing slashes).
135
136 The is_local param is flag indicating if the paths should be
mblighbc9402b2009-12-29 01:15:34 +0000137 interpreted as local or remote paths.
138 """
jadmanskid7b79ed2009-01-07 17:19:48 +0000139
140 # non-trailing slash paths should just work
141 if len(path) == 0 or path[-1] != "/":
142 return [path]
143
144 # make a function to test if a pattern matches any files
145 if is_local:
showard56176ec2009-10-28 19:52:30 +0000146 def glob_matches_files(path, pattern):
147 return len(glob.glob(path + pattern)) > 0
jadmanskid7b79ed2009-01-07 17:19:48 +0000148 else:
showard56176ec2009-10-28 19:52:30 +0000149 def glob_matches_files(path, pattern):
150 result = self.run("ls \"%s\"%s" % (utils.sh_escape(path),
151 pattern),
152 stdout_tee=None, ignore_status=True)
jadmanskid7b79ed2009-01-07 17:19:48 +0000153 return result.exit_status == 0
154
155 # take a set of globs that cover all files, and see which are needed
156 patterns = ["*", ".[!.]*"]
showard56176ec2009-10-28 19:52:30 +0000157 patterns = [p for p in patterns if glob_matches_files(path, p)]
jadmanskid7b79ed2009-01-07 17:19:48 +0000158
159 # convert them into a set of paths suitable for the commandline
jadmanskid7b79ed2009-01-07 17:19:48 +0000160 if is_local:
showard56176ec2009-10-28 19:52:30 +0000161 return ["\"%s\"%s" % (utils.sh_escape(path), pattern)
162 for pattern in patterns]
jadmanskid7b79ed2009-01-07 17:19:48 +0000163 else:
showard56176ec2009-10-28 19:52:30 +0000164 return [utils.scp_remote_escape(path) + pattern
165 for pattern in patterns]
jadmanskid7b79ed2009-01-07 17:19:48 +0000166
167
168 def _make_rsync_compatible_source(self, source, is_local):
mblighbc9402b2009-12-29 01:15:34 +0000169 """
170 Applies the same logic as _make_rsync_compatible_globs, but
jadmanskid7b79ed2009-01-07 17:19:48 +0000171 applies it to an entire list of sources, producing a new list of
mblighbc9402b2009-12-29 01:15:34 +0000172 sources, properly quoted.
173 """
jadmanskid7b79ed2009-01-07 17:19:48 +0000174 return sum((self._make_rsync_compatible_globs(path, is_local)
175 for path in source), [])
jadmanskica7da372008-10-21 16:26:52 +0000176
177
mblighfeac0102009-04-28 18:31:12 +0000178 def _set_umask_perms(self, dest):
mblighbc9402b2009-12-29 01:15:34 +0000179 """
180 Given a destination file/dir (recursively) set the permissions on
181 all the files and directories to the max allowed by running umask.
182 """
mblighfeac0102009-04-28 18:31:12 +0000183
184 # now this looks strange but I haven't found a way in Python to _just_
185 # get the umask, apparently the only option is to try to set it
186 umask = os.umask(0)
187 os.umask(umask)
188
189 max_privs = 0777 & ~umask
190
191 def set_file_privs(filename):
192 file_stat = os.stat(filename)
193
194 file_privs = max_privs
195 # if the original file permissions do not have at least one
196 # executable bit then do not set it anywhere
197 if not file_stat.st_mode & 0111:
198 file_privs &= ~0111
199
200 os.chmod(filename, file_privs)
201
202 # try a bottom-up walk so changes on directory permissions won't cut
203 # our access to the files/directories inside it
204 for root, dirs, files in os.walk(dest, topdown=False):
205 # when setting the privileges we emulate the chmod "X" behaviour
206 # that sets to execute only if it is a directory or any of the
207 # owner/group/other already has execute right
208 for dirname in dirs:
209 os.chmod(os.path.join(root, dirname), max_privs)
210
211 for filename in files:
212 set_file_privs(os.path.join(root, filename))
213
214
215 # now set privs for the dest itself
216 if os.path.isdir(dest):
217 os.chmod(dest, max_privs)
218 else:
219 set_file_privs(dest)
220
221
mbligh45561782009-05-11 21:14:34 +0000222 def get_file(self, source, dest, delete_dest=False, preserve_perm=True,
223 preserve_symlinks=False):
jadmanskica7da372008-10-21 16:26:52 +0000224 """
225 Copy files from the remote host to a local path.
226
227 Directories will be copied recursively.
228 If a source component is a directory with a trailing slash,
229 the content of the directory will be copied, otherwise, the
230 directory itself and its content will be copied. This
231 behavior is similar to that of the program 'rsync'.
232
233 Args:
234 source: either
235 1) a single file or directory, as a string
236 2) a list of one or more (possibly mixed)
237 files or directories
238 dest: a file or a directory (if source contains a
239 directory or more than one element, you must
240 supply a directory dest)
mbligh89e258d2008-10-24 13:58:08 +0000241 delete_dest: if this is true, the command will also clear
242 out any old files at dest that are not in the
243 source
mblighfeac0102009-04-28 18:31:12 +0000244 preserve_perm: tells get_file() to try to preserve the sources
245 permissions on files and dirs
mbligh45561782009-05-11 21:14:34 +0000246 preserve_symlinks: try to preserve symlinks instead of
247 transforming them into files/dirs on copy
jadmanskica7da372008-10-21 16:26:52 +0000248
249 Raises:
250 AutoservRunError: the scp command failed
251 """
mblighefccc1b2010-01-11 19:08:42 +0000252
253 # Start a master SSH connection if necessary.
254 self.start_master_ssh()
255
jadmanskica7da372008-10-21 16:26:52 +0000256 if isinstance(source, basestring):
257 source = [source]
jadmanskid7b79ed2009-01-07 17:19:48 +0000258 dest = os.path.abspath(dest)
jadmanskica7da372008-10-21 16:26:52 +0000259
mblighc9892c02010-01-06 19:02:16 +0000260 # If rsync is disabled or fails, try scp.
showard6eafb492010-01-15 20:29:06 +0000261 try_scp = True
262 if self.use_rsync():
mblighc9892c02010-01-06 19:02:16 +0000263 try:
264 remote_source = self._encode_remote_paths(source)
265 local_dest = utils.sh_escape(dest)
266 rsync = self._make_rsync_cmd([remote_source], local_dest,
267 delete_dest, preserve_symlinks)
268 utils.run(rsync)
showard6eafb492010-01-15 20:29:06 +0000269 try_scp = False
mblighc9892c02010-01-06 19:02:16 +0000270 except error.CmdError, e:
271 logging.warn("trying scp, rsync failed: %s" % e)
mblighc9892c02010-01-06 19:02:16 +0000272
273 if try_scp:
jadmanskid7b79ed2009-01-07 17:19:48 +0000274 # scp has no equivalent to --delete, just drop the entire dest dir
275 if delete_dest and os.path.isdir(dest):
276 shutil.rmtree(dest)
277 os.mkdir(dest)
jadmanskica7da372008-10-21 16:26:52 +0000278
jadmanskid7b79ed2009-01-07 17:19:48 +0000279 remote_source = self._make_rsync_compatible_source(source, False)
280 if remote_source:
showard56176ec2009-10-28 19:52:30 +0000281 # _make_rsync_compatible_source() already did the escaping
282 remote_source = self._encode_remote_paths(remote_source,
283 escape=False)
jadmanskid7b79ed2009-01-07 17:19:48 +0000284 local_dest = utils.sh_escape(dest)
jadmanski2583a432009-02-10 23:59:11 +0000285 scp = self._make_scp_cmd([remote_source], local_dest)
jadmanskid7b79ed2009-01-07 17:19:48 +0000286 try:
287 utils.run(scp)
288 except error.CmdError, e:
289 raise error.AutoservRunError(e.args[0], e.args[1])
jadmanskica7da372008-10-21 16:26:52 +0000290
mblighfeac0102009-04-28 18:31:12 +0000291 if not preserve_perm:
292 # we have no way to tell scp to not try to preserve the
293 # permissions so set them after copy instead.
294 # for rsync we could use "--no-p --chmod=ugo=rwX" but those
295 # options are only in very recent rsync versions
296 self._set_umask_perms(dest)
297
jadmanskica7da372008-10-21 16:26:52 +0000298
mbligh45561782009-05-11 21:14:34 +0000299 def send_file(self, source, dest, delete_dest=False,
300 preserve_symlinks=False):
jadmanskica7da372008-10-21 16:26:52 +0000301 """
302 Copy files from a local path to the remote host.
303
304 Directories will be copied recursively.
305 If a source component is a directory with a trailing slash,
306 the content of the directory will be copied, otherwise, the
307 directory itself and its content will be copied. This
308 behavior is similar to that of the program 'rsync'.
309
310 Args:
311 source: either
312 1) a single file or directory, as a string
313 2) a list of one or more (possibly mixed)
314 files or directories
315 dest: a file or a directory (if source contains a
316 directory or more than one element, you must
317 supply a directory dest)
mbligh89e258d2008-10-24 13:58:08 +0000318 delete_dest: if this is true, the command will also clear
319 out any old files at dest that are not in the
320 source
mbligh45561782009-05-11 21:14:34 +0000321 preserve_symlinks: controls if symlinks on the source will be
322 copied as such on the destination or transformed into the
323 referenced file/directory
jadmanskica7da372008-10-21 16:26:52 +0000324
325 Raises:
326 AutoservRunError: the scp command failed
327 """
mblighefccc1b2010-01-11 19:08:42 +0000328
329 # Start a master SSH connection if necessary.
330 self.start_master_ssh()
331
jadmanskica7da372008-10-21 16:26:52 +0000332 if isinstance(source, basestring):
333 source = [source]
jadmanski2583a432009-02-10 23:59:11 +0000334 remote_dest = self._encode_remote_paths([dest])
jadmanskica7da372008-10-21 16:26:52 +0000335
mblighc9892c02010-01-06 19:02:16 +0000336 # If rsync is disabled or fails, try scp.
showard6eafb492010-01-15 20:29:06 +0000337 try_scp = True
338 if self.use_rsync():
mblighc9892c02010-01-06 19:02:16 +0000339 try:
340 local_sources = [utils.sh_escape(path) for path in source]
341 rsync = self._make_rsync_cmd(local_sources, remote_dest,
342 delete_dest, preserve_symlinks)
343 utils.run(rsync)
showard6eafb492010-01-15 20:29:06 +0000344 try_scp = False
mblighc9892c02010-01-06 19:02:16 +0000345 except error.CmdError, e:
346 logging.warn("trying scp, rsync failed: %s" % e)
mblighc9892c02010-01-06 19:02:16 +0000347
348 if try_scp:
jadmanskid7b79ed2009-01-07 17:19:48 +0000349 # scp has no equivalent to --delete, just drop the entire dest dir
350 if delete_dest:
showard27160152009-07-15 14:28:42 +0000351 is_dir = self.run("ls -d %s/" % dest,
jadmanskid7b79ed2009-01-07 17:19:48 +0000352 ignore_status=True).exit_status == 0
353 if is_dir:
354 cmd = "rm -rf %s && mkdir %s"
mbligh5a0ca532009-08-03 16:44:34 +0000355 cmd %= (dest, dest)
jadmanskid7b79ed2009-01-07 17:19:48 +0000356 self.run(cmd)
jadmanskica7da372008-10-21 16:26:52 +0000357
jadmanski2583a432009-02-10 23:59:11 +0000358 local_sources = self._make_rsync_compatible_source(source, True)
359 if local_sources:
360 scp = self._make_scp_cmd(local_sources, remote_dest)
jadmanskid7b79ed2009-01-07 17:19:48 +0000361 try:
362 utils.run(scp)
363 except error.CmdError, e:
364 raise error.AutoservRunError(e.args[0], e.args[1])
365
jadmanskica7da372008-10-21 16:26:52 +0000366
367 def ssh_ping(self, timeout=60):
368 try:
369 self.run("true", timeout=timeout, connect_timeout=timeout)
370 except error.AutoservSSHTimeout:
mblighd0e94982009-07-11 00:15:18 +0000371 msg = "Host (ssh) verify timed out (timeout = %d)" % timeout
jadmanskica7da372008-10-21 16:26:52 +0000372 raise error.AutoservSSHTimeout(msg)
mbligh9d738d62009-03-09 21:17:10 +0000373 except error.AutoservSshPermissionDeniedError:
374 #let AutoservSshPermissionDeniedError be visible to the callers
375 raise
jadmanskica7da372008-10-21 16:26:52 +0000376 except error.AutoservRunError, e:
mblighc971c5f2009-06-08 16:48:54 +0000377 # convert the generic AutoservRunError into something more
378 # specific for this context
379 raise error.AutoservSshPingHostError(e.description + '\n' +
380 repr(e.result_obj))
jadmanskica7da372008-10-21 16:26:52 +0000381
382
383 def is_up(self):
384 """
385 Check if the remote host is up.
386
jadmanskic0354912010-01-12 15:57:29 +0000387 @returns True if the remote host is up, False otherwise
jadmanskica7da372008-10-21 16:26:52 +0000388 """
389 try:
390 self.ssh_ping()
391 except error.AutoservError:
392 return False
393 else:
394 return True
395
396
397 def wait_up(self, timeout=None):
398 """
399 Wait until the remote host is up or the timeout expires.
400
401 In fact, it will wait until an ssh connection to the remote
402 host can be established, and getty is running.
403
jadmanskic0354912010-01-12 15:57:29 +0000404 @param timeout time limit in seconds before returning even
405 if the host is not up.
jadmanskica7da372008-10-21 16:26:52 +0000406
jadmanskic0354912010-01-12 15:57:29 +0000407 @returns True if the host was found to be up, False otherwise
jadmanskica7da372008-10-21 16:26:52 +0000408 """
409 if timeout:
410 end_time = time.time() + timeout
411
412 while not timeout or time.time() < end_time:
413 if self.is_up():
414 try:
415 if self.are_wait_up_processes_up():
jadmanski7ebac3d2010-06-17 16:06:31 +0000416 logging.debug('Host %s is now up', self.hostname)
jadmanskica7da372008-10-21 16:26:52 +0000417 return True
418 except error.AutoservError:
419 pass
420 time.sleep(1)
421
jadmanski7ebac3d2010-06-17 16:06:31 +0000422 logging.debug('Host %s is still down after waiting %d seconds',
423 self.hostname, int(timeout + time.time() - end_time))
jadmanskica7da372008-10-21 16:26:52 +0000424 return False
425
426
jadmanskic0354912010-01-12 15:57:29 +0000427 def wait_down(self, timeout=None, warning_timer=None, old_boot_id=None):
jadmanskica7da372008-10-21 16:26:52 +0000428 """
429 Wait until the remote host is down or the timeout expires.
430
jadmanskic0354912010-01-12 15:57:29 +0000431 If old_boot_id is provided, this will wait until either the machine
432 is unpingable or self.get_boot_id() returns a value different from
433 old_boot_id. If the boot_id value has changed then the function
434 returns true under the assumption that the machine has shut down
435 and has now already come back up.
jadmanskica7da372008-10-21 16:26:52 +0000436
jadmanskic0354912010-01-12 15:57:29 +0000437 If old_boot_id is None then until the machine becomes unreachable the
438 method assumes the machine has not yet shut down.
jadmanskica7da372008-10-21 16:26:52 +0000439
jadmanskic0354912010-01-12 15:57:29 +0000440 @param timeout Time limit in seconds before returning even
441 if the host is still up.
442 @param warning_timer Time limit in seconds that will generate
443 a warning if the host is not down yet.
444 @param old_boot_id A string containing the result of self.get_boot_id()
445 prior to the host being told to shut down. Can be None if this is
446 not available.
447
448 @returns True if the host was found to be down, False otherwise
jadmanskica7da372008-10-21 16:26:52 +0000449 """
mblighe5e3cf22010-05-27 23:33:14 +0000450 #TODO: there is currently no way to distinguish between knowing
451 #TODO: boot_id was unsupported and not knowing the boot_id.
mbligh2ed998f2009-04-08 21:03:47 +0000452 current_time = time.time()
jadmanskica7da372008-10-21 16:26:52 +0000453 if timeout:
mbligh2ed998f2009-04-08 21:03:47 +0000454 end_time = current_time + timeout
jadmanskica7da372008-10-21 16:26:52 +0000455
mbligh2ed998f2009-04-08 21:03:47 +0000456 if warning_timer:
457 warn_time = current_time + warning_timer
458
jadmanskic0354912010-01-12 15:57:29 +0000459 if old_boot_id is not None:
460 logging.debug('Host %s pre-shutdown boot_id is %s',
461 self.hostname, old_boot_id)
462
mbligh2ed998f2009-04-08 21:03:47 +0000463 while not timeout or current_time < end_time:
jadmanskic0354912010-01-12 15:57:29 +0000464 try:
465 new_boot_id = self.get_boot_id()
mblighdbc7e4a2010-01-15 20:34:20 +0000466 except error.AutoservError:
jadmanskic0354912010-01-12 15:57:29 +0000467 logging.debug('Host %s is now unreachable over ssh, is down',
468 self.hostname)
jadmanskica7da372008-10-21 16:26:52 +0000469 return True
jadmanskic0354912010-01-12 15:57:29 +0000470 else:
471 # if the machine is up but the boot_id value has changed from
472 # old boot id, then we can assume the machine has gone down
473 # and then already come back up
474 if old_boot_id is not None and old_boot_id != new_boot_id:
475 logging.debug('Host %s now has boot_id %s and so must '
476 'have rebooted', self.hostname, new_boot_id)
477 return True
mbligh2ed998f2009-04-08 21:03:47 +0000478
479 if warning_timer and current_time > warn_time:
480 self.record("WARN", None, "shutdown",
481 "Shutdown took longer than %ds" % warning_timer)
482 # Print the warning only once.
483 warning_timer = None
mbligha4464402009-04-17 20:13:41 +0000484 # If a machine is stuck switching runlevels
485 # This may cause the machine to reboot.
486 self.run('kill -HUP 1', ignore_status=True)
mbligh2ed998f2009-04-08 21:03:47 +0000487
jadmanskica7da372008-10-21 16:26:52 +0000488 time.sleep(1)
mbligh2ed998f2009-04-08 21:03:47 +0000489 current_time = time.time()
jadmanskica7da372008-10-21 16:26:52 +0000490
491 return False
jadmanskif6562912008-10-21 17:59:01 +0000492
mbligha0a27592009-01-24 01:41:36 +0000493
jadmanskif6562912008-10-21 17:59:01 +0000494 # tunable constants for the verify & repair code
mblighb86bfa12010-02-12 20:22:21 +0000495 AUTOTEST_GB_DISKSPACE_REQUIRED = get_value("SERVER",
496 "gb_diskspace_required",
497 type=int,
498 default=20)
mbligha0a27592009-01-24 01:41:36 +0000499
jadmanskif6562912008-10-21 17:59:01 +0000500
showardca572982009-09-18 21:20:01 +0000501 def verify_connectivity(self):
502 super(AbstractSSHHost, self).verify_connectivity()
jadmanskif6562912008-10-21 17:59:01 +0000503
showardb18134f2009-03-20 20:52:18 +0000504 logging.info('Pinging host ' + self.hostname)
jadmanskif6562912008-10-21 17:59:01 +0000505 self.ssh_ping()
mbligh2ba7ab02009-08-24 22:09:26 +0000506 logging.info("Host (ssh) %s is alive", self.hostname)
jadmanskif6562912008-10-21 17:59:01 +0000507
jadmanski80deb752009-01-21 17:14:16 +0000508 if self.is_shutting_down():
mblighc971c5f2009-06-08 16:48:54 +0000509 raise error.AutoservHostIsShuttingDownError("Host is shutting down")
jadmanski80deb752009-01-21 17:14:16 +0000510
mblighb49b5232009-02-12 21:54:49 +0000511
showardca572982009-09-18 21:20:01 +0000512 def verify_software(self):
513 super(AbstractSSHHost, self).verify_software()
jadmanskif6562912008-10-21 17:59:01 +0000514 try:
showardad812bf2009-10-20 23:49:56 +0000515 self.check_diskspace(autotest.Autotest.get_install_dir(self),
516 self.AUTOTEST_GB_DISKSPACE_REQUIRED)
jadmanskif6562912008-10-21 17:59:01 +0000517 except error.AutoservHostError:
518 raise # only want to raise if it's a space issue
showardad812bf2009-10-20 23:49:56 +0000519 except autotest.AutodirNotFoundError:
showardca572982009-09-18 21:20:01 +0000520 # autotest dir may not exist, etc. ignore
521 logging.debug('autodir space check exception, this is probably '
522 'safe to ignore\n' + traceback.format_exc())
mblighefccc1b2010-01-11 19:08:42 +0000523
524
525 def close(self):
526 super(AbstractSSHHost, self).close()
527 self._cleanup_master_ssh()
lmraf676f32010-02-04 03:36:26 +0000528 self.known_hosts_file.close()
mblighefccc1b2010-01-11 19:08:42 +0000529
530
531 def _cleanup_master_ssh(self):
532 """
533 Release all resources (process, temporary directory) used by an active
534 master SSH connection.
535 """
536 # If a master SSH connection is running, kill it.
537 if self.master_ssh_job is not None:
538 utils.nuke_subprocess(self.master_ssh_job.sp)
539 self.master_ssh_job = None
540
541 # Remove the temporary directory for the master SSH socket.
542 if self.master_ssh_tempdir is not None:
543 self.master_ssh_tempdir.clean()
544 self.master_ssh_tempdir = None
545 self.master_ssh_option = ''
546
547
548 def start_master_ssh(self):
549 """
550 Called whenever a slave SSH connection needs to be initiated (e.g., by
551 run, rsync, scp). If master SSH support is enabled and a master SSH
552 connection is not active already, start a new one in the background.
553 Also, cleanup any zombie master SSH connections (e.g., dead due to
554 reboot).
555 """
556 if not enable_master_ssh:
557 return
558
559 # If a previously started master SSH connection is not running
560 # anymore, it needs to be cleaned up and then restarted.
561 if self.master_ssh_job is not None:
562 if self.master_ssh_job.sp.poll() is not None:
563 logging.info("Master ssh connection to %s is down.",
564 self.hostname)
565 self._cleanup_master_ssh()
566
567 # Start a new master SSH connection.
568 if self.master_ssh_job is None:
569 # Create a shared socket in a temp location.
570 self.master_ssh_tempdir = autotemp.tempdir(unique_id='ssh-master')
571 self.master_ssh_option = ("-o ControlPath=%s/socket" %
572 self.master_ssh_tempdir.name)
573
574 # Start the master SSH connection in the background.
mbligh5644c122010-01-29 17:43:26 +0000575 master_cmd = self.ssh_command(options="-N -o ControlMaster=yes")
mblighefccc1b2010-01-11 19:08:42 +0000576 logging.info("Starting master ssh connection '%s'" % master_cmd)
577 self.master_ssh_job = utils.BgJob(master_cmd)
mbligh0a883702010-04-21 01:58:34 +0000578
579
580 def clear_known_hosts(self):
581 """Clears out the temporary ssh known_hosts file.
582
583 This is useful if the test SSHes to the machine, then reinstalls it,
584 then SSHes to it again. It can be called after the reinstall to
585 reduce the spam in the logs.
586 """
587 logging.info("Clearing known hosts for host '%s', file '%s'.",
588 self.hostname, self.known_hosts_fd)
589 # Clear out the file by opening it for writing and then closing.
590 fh = open(self.known_hosts_fd, "w")
591 fh.close()