blob: 1a79a62627f955f39478568e2f1c9ed15ef582ab [file] [log] [blame]
mblighdcd57a82007-07-11 23:06:47 +00001#!/usr/bin/python
2#
3# Copyright 2007 Google Inc. Released under the GPL v2
4
mbligh7d2bde82007-08-02 16:26:10 +00005"""
6This module defines the SSHHost class.
mblighdcd57a82007-07-11 23:06:47 +00007
8Implementation details:
9You should import the "hosts" package instead of importing each type of host.
10
11 SSHHost: a remote machine with a ssh access
12"""
13
mbligh7d2bde82007-08-02 16:26:10 +000014__author__ = """
15mbligh@google.com (Martin J. Bligh),
mblighdcd57a82007-07-11 23:06:47 +000016poirier@google.com (Benjamin Poirier),
mbligh7d2bde82007-08-02 16:26:10 +000017stutsman@google.com (Ryan Stutsman)
18"""
mblighdcd57a82007-07-11 23:06:47 +000019
20
mblighde384372007-10-17 04:25:37 +000021import types, os, sys, signal, subprocess, time, re, socket
mbligh5f876ad2007-10-12 23:59:53 +000022import base_classes, utils, errors, bootloader
mblighdcd57a82007-07-11 23:06:47 +000023
24
25class SSHHost(base_classes.RemoteHost):
mbligh7d2bde82007-08-02 16:26:10 +000026 """
27 This class represents a remote machine controlled through an ssh
mblighdcd57a82007-07-11 23:06:47 +000028 session on which you can run programs.
mbligh7d2bde82007-08-02 16:26:10 +000029
mblighdcd57a82007-07-11 23:06:47 +000030 It is not the machine autoserv is running on. The machine must be
31 configured for password-less login, for example through public key
32 authentication.
mbligh7d2bde82007-08-02 16:26:10 +000033
mbligh3409ee72007-10-16 23:58:33 +000034 It includes support for controlling the machine through a serial
35 console on which you can run programs. If such a serial console is
36 set up on the machine then capabilities such as hard reset and
37 boot strap monitoring are available. If the machine does not have a
38 serial console available then ordinary SSH-based commands will
39 still be available, but attempts to use extensions such as
40 console logging or hard reset will fail silently.
41
mblighdcd57a82007-07-11 23:06:47 +000042 Implementation details:
43 This is a leaf class in an abstract class hierarchy, it must
44 implement the unimplemented methods in parent classes.
45 """
mbligh7d2bde82007-08-02 16:26:10 +000046
mbligh0faf91f2007-10-18 03:10:48 +000047 SSH_BASE_COMMAND = 'ssh -a'
48
mblighde384372007-10-17 04:25:37 +000049 def __init__(self, hostname, user="root", port=22, initialize=True,
mblighd2fc50f2007-10-23 22:38:00 +000050 conmux_log="console.log", conmux_server=None, conmux_attach=None,
51 netconsole_log="netconsole.log", netconsole_port=6666):
mbligh7d2bde82007-08-02 16:26:10 +000052 """
53 Construct a SSHHost object
mblighdcd57a82007-07-11 23:06:47 +000054
55 Args:
56 hostname: network hostname or address of remote machine
57 user: user to log in as on the remote machine
58 port: port the ssh daemon is listening on on the remote
59 machine
mbligh9708f732007-10-18 03:18:54 +000060 """
mblighdcd57a82007-07-11 23:06:47 +000061 self.hostname= hostname
62 self.user= user
63 self.port= port
64 self.tmp_dirs= []
mbligh137a05c2007-10-04 15:56:51 +000065 self.initialize = initialize
mbligh91334902007-09-28 01:47:59 +000066
mbligh9708f732007-10-18 03:18:54 +000067 super(SSHHost, self).__init__()
68
mbligh3409ee72007-10-16 23:58:33 +000069 self.conmux_server = conmux_server
70 self.conmux_attach = self.__find_console_attach(conmux_attach)
71 self.logger_pid = None
mblighde384372007-10-17 04:25:37 +000072 self.__start_console_log(conmux_log)
mbligh3409ee72007-10-16 23:58:33 +000073
mbligha0452c82007-08-08 20:24:57 +000074 self.bootloader = bootloader.Bootloader(self)
mbligh7d2bde82007-08-02 16:26:10 +000075
mblighde384372007-10-17 04:25:37 +000076 self.__init_netconsole_params(netconsole_port)
77 self.netlogger_pid = None
78 self.__start_netconsole_log(netconsole_log, netconsole_port)
79 self.__load_netconsole_module()
80
mbligh7d2bde82007-08-02 16:26:10 +000081
mblighdcd57a82007-07-11 23:06:47 +000082 def __del__(self):
mbligh7d2bde82007-08-02 16:26:10 +000083 """
84 Destroy a SSHHost object
mblighdcd57a82007-07-11 23:06:47 +000085 """
86 for dir in self.tmp_dirs:
87 try:
88 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
89 except errors.AutoservRunError:
90 pass
mblighde384372007-10-17 04:25:37 +000091 # kill the console logger
mbligh7364ae42007-10-18 03:20:34 +000092 if getattr(self, 'logger_pid', None):
mbligh3409ee72007-10-16 23:58:33 +000093 try:
94 pgid = os.getpgid(self.logger_pid)
95 os.killpg(pgid, signal.SIGTERM)
96 except OSError:
97 pass
mblighde384372007-10-17 04:25:37 +000098 # kill the netconsole logger
mbligh7364ae42007-10-18 03:20:34 +000099 if getattr(self, 'netlogger_pid', None):
mblighde384372007-10-17 04:25:37 +0000100 try:
101 os.kill(self.netlogger_pid, signal.SIGTERM)
102 except OSError:
103 pass
104
105
106 def __init_netconsole_params(self, port):
107 """
108 Connect to the remote machine and determine the values to use for the
109 required netconsole parameters.
110 """
111 self.__netconsole_param = ""
112 # PROBLEM: on machines with multiple IPs this may not make any sense
113 # It also doesn't work with IPv6
114 remote_ip = socket.gethostbyname(self.hostname)
115 local_ip = socket.gethostbyname(socket.gethostname())
116 # Get the gateway of the remote machine
117 try:
118 traceroute = self.run('traceroute -n %s' % local_ip)
119 except errors.AutoservRunError:
120 return
121 first_node = traceroute.stdout.split("\n")[0]
122 match = re.search(r'\s+((\d+\.){3}\d+)\s+', first_node)
123 if match:
124 router_ip = match.group(1)
125 else:
126 return
127 # Look up the MAC address of the gateway
128 try:
129 self.run('ping -c 1 %s' % router_ip)
130 arp = self.run('arp -n -a %s' % router_ip)
131 except errors.AutoservRunError:
132 return
133 match = re.search(r'\s+(([0-9A-F]{2}:){5}[0-9A-F]{2})\s+', arp.stdout)
134 if match:
135 gateway_mac = match.group(1)
136 else:
137 return
138 self.__netconsole_param = 'netconsole=@%s/,%s@%s/%s' % (remote_ip,
139 port,
140 local_ip,
141 gateway_mac)
142
143
144 def __start_netconsole_log(self, logfilename, port):
145 """
146 Log the output of netconsole to a specified file
147 """
148 if logfilename == None:
149 return
150 cmd = ['nc', '-u', '-l', '-p', str(port)]
mblighd2fc50f2007-10-23 22:38:00 +0000151 logger = subprocess.Popen(cmd, stdout=open(logfilename, "a", 0))
mblighde384372007-10-17 04:25:37 +0000152 self.netlogger_pid = logger.pid
153
154
155 def __load_netconsole_module(self):
156 """
157 Make a best effort to load the netconsole module.
158
159 Note that loading the module can fail even when the remote machine is
160 working correctly if netconsole is already compiled into the kernel
161 and started.
162 """
163 try:
164 self.run('modprobe netconsole %s' % self.__netconsole_param)
165 except errors.AutoservRunError:
166 # if it fails there isn't much we can do, just keep going
167 pass
168
169
170 def __unload_netconsole_module(self):
171 try:
172 self.run('modprobe -r netconsole')
173 except errors.AutoservRunError:
174 pass
mbligh3409ee72007-10-16 23:58:33 +0000175
176
177 def __wait_for_restart(self, timeout):
178 self.wait_down(60) # Make sure he's dead, Jim
179 self.wait_up(timeout)
180 time.sleep(2) # this is needed for complete reliability
181 self.wait_up(timeout)
182 print "Reboot complete"
183
184
185 def hardreset(self, timeout=600, wait=True):
186 """
187 Reach out and slap the box in the power switch
188 """
189 result = self.__console_run(r"'~$hardreset'")
190 if wait:
191 self.__wait_for_restart(timeout)
192 return result
193
194
195 def __start_console_log(self, logfilename):
196 """
197 Log the output of the console session to a specified file
198 """
199 if logfilename == None:
200 return
201 if not self.conmux_attach or not os.path.exists(self.conmux_attach):
202 return
203 if self.conmux_server:
204 to = '%s/%s' % (self.conmux_server, self.hostname)
205 else:
206 to = self.hostname
mblighd2fc50f2007-10-23 22:38:00 +0000207 cmd = [self.conmux_attach, to, 'cat -']
mbligh3409ee72007-10-16 23:58:33 +0000208 logger = subprocess.Popen(cmd,
mblighd2fc50f2007-10-23 22:38:00 +0000209 stdout=open(logfilename, 'a', 0),
mbligh3409ee72007-10-16 23:58:33 +0000210 stderr=open('/dev/null', 'w'),
211 preexec_fn=lambda: os.setpgid(0, 0))
212 self.logger_pid = logger.pid
213
214
215 def __find_console_attach(self, conmux_attach):
216 if conmux_attach:
217 return conmux_attach
218 try:
219 res = utils.run('which conmux-attach')
220 if res.exit_status == 0:
221 return res.stdout.strip()
222 except errors.AutoservRunError, e:
223 pass
mbligh9708f732007-10-18 03:18:54 +0000224 autotest_conmux = os.path.join(self.serverdir, '..',
mbligh3409ee72007-10-16 23:58:33 +0000225 'conmux', 'conmux-attach')
mbligh9708f732007-10-18 03:18:54 +0000226 autotest_conmux_alt = os.path.join(self.serverdir,
mbligh3409ee72007-10-16 23:58:33 +0000227 '..', 'autotest',
228 'conmux', 'conmux-attach')
229 locations = [autotest_conmux,
230 autotest_conmux_alt,
231 '/usr/local/conmux/bin/conmux-attach',
232 '/usr/bin/conmux-attach']
233 for l in locations:
234 if os.path.exists(l):
235 return l
236
237 print "WARNING: conmux-attach not found on autoserv server"
238 return None
239
240
241 def __console_run(self, cmd):
242 """
243 Send a command to the conmux session
244 """
245 if not self.conmux_attach or not os.path.exists(self.conmux_attach):
246 return False
247 if self.conmux_server:
248 to = '%s/%s' % (self.conmux_server, self.hostname)
249 else:
250 to = self.hostname
251 cmd = '%s %s echo %s 2> /dev/null' % (self.conmux_attach,
252 to,
253 cmd)
254 result = os.system(cmd)
255 return result == 0
mbligh7d2bde82007-08-02 16:26:10 +0000256
257
mblighe6647d12007-10-17 00:00:01 +0000258 def ssh_command(self):
259 """Construct an ssh command with proper args for this host."""
mbligh0faf91f2007-10-18 03:10:48 +0000260 return r'%s -l %s -p %d %s' % (self.SSH_BASE_COMMAND,
261 self.user,
262 self.port,
263 self.hostname)
mblighe6647d12007-10-17 00:00:01 +0000264
265
mblighcf965b02007-07-25 16:49:45 +0000266 def run(self, command, timeout=None, ignore_status=False):
mbligh7d2bde82007-08-02 16:26:10 +0000267 """
268 Run a command on the remote host.
mblighdcd57a82007-07-11 23:06:47 +0000269
270 Args:
271 command: the command line string
272 timeout: time limit in seconds before attempting to
273 kill the running process. The run() function
274 will take a few seconds longer than 'timeout'
275 to complete if it has to kill the process.
mbligh8b85dfb2007-08-28 09:50:31 +0000276 ignore_status: do not raise an exception, no matter
277 what the exit code of the command is.
mblighdcd57a82007-07-11 23:06:47 +0000278
279 Returns:
280 a hosts.base_classes.CmdResult object
281
282 Raises:
283 AutoservRunError: the exit code of the command
284 execution was not 0
285 """
286 #~ print "running %s" % (command,)
mblighe6647d12007-10-17 00:00:01 +0000287 result= utils.run(r'%s "%s"' % (self.ssh_command(),
288 utils.sh_escape(command)),
289 timeout, ignore_status)
mblighdcd57a82007-07-11 23:06:47 +0000290 return result
mbligh7d2bde82007-08-02 16:26:10 +0000291
292
mbligha0452c82007-08-08 20:24:57 +0000293 def reboot(self, timeout=600, label=None, kernel_args=None, wait=True):
mbligh7d2bde82007-08-02 16:26:10 +0000294 """
295 Reboot the remote host.
mbligh8b85dfb2007-08-28 09:50:31 +0000296
mbligha0452c82007-08-08 20:24:57 +0000297 Args:
298 timeout
mbligh8b85dfb2007-08-28 09:50:31 +0000299 """
mblighde384372007-10-17 04:25:37 +0000300 # forcibly include the "netconsole" kernel arg
301 if self.__netconsole_param:
302 if kernel_args is None:
303 kernel_args = self.__netconsole_param
304 else:
305 kernel_args += " " + self.__netconsole_param
306 # unload the (possibly loaded) module to avoid shutdown issues
307 self.__unload_netconsole_module()
mbligha0452c82007-08-08 20:24:57 +0000308 if label or kernel_args:
309 self.bootloader.install_boottool()
310 if label:
311 self.bootloader.set_default(label)
312 if kernel_args:
313 if not label:
314 default = int(self.bootloader.get_default())
315 label = self.bootloader.get_titles()[default]
316 self.bootloader.add_args(label, kernel_args)
mblighd742a222007-09-30 01:27:06 +0000317 print "Reboot: initiating reboot"
mbligha0452c82007-08-08 20:24:57 +0000318 self.run('reboot')
319 if wait:
mbligh3409ee72007-10-16 23:58:33 +0000320 self.__wait_for_restart(timeout)
mblighde384372007-10-17 04:25:37 +0000321 self.__load_netconsole_module() # if the builtin fails
mbligha0452c82007-08-08 20:24:57 +0000322
mbligh7d2bde82007-08-02 16:26:10 +0000323
mblighdcd57a82007-07-11 23:06:47 +0000324 def get_file(self, source, dest):
mbligh7d2bde82007-08-02 16:26:10 +0000325 """
326 Copy files from the remote host to a local path.
mblighdcd57a82007-07-11 23:06:47 +0000327
328 Directories will be copied recursively.
329 If a source component is a directory with a trailing slash,
330 the content of the directory will be copied, otherwise, the
331 directory itself and its content will be copied. This
332 behavior is similar to that of the program 'rsync'.
333
334 Args:
335 source: either
336 1) a single file or directory, as a string
337 2) a list of one or more (possibly mixed)
338 files or directories
339 dest: a file or a directory (if source contains a
340 directory or more than one element, you must
341 supply a directory dest)
342
343 Raises:
344 AutoservRunError: the scp command failed
345 """
346 if isinstance(source, types.StringTypes):
347 source= [source]
348
349 processed_source= []
350 for entry in source:
351 if entry.endswith('/'):
352 format_string= '%s@%s:"%s*"'
353 else:
354 format_string= '%s@%s:"%s"'
355 entry= format_string % (self.user, self.hostname,
356 utils.scp_remote_escape(entry))
357 processed_source.append(entry)
358
359 processed_dest= os.path.abspath(dest)
360 if os.path.isdir(dest):
361 processed_dest= "%s/" % (utils.sh_escape(processed_dest),)
362 else:
363 processed_dest= utils.sh_escape(processed_dest)
364
365 utils.run('scp -rpq %s "%s"' % (
366 " ".join(processed_source),
367 processed_dest))
mbligh7d2bde82007-08-02 16:26:10 +0000368
369
mblighdcd57a82007-07-11 23:06:47 +0000370 def send_file(self, source, dest):
mbligh7d2bde82007-08-02 16:26:10 +0000371 """
372 Copy files from a local path to the remote host.
mblighdcd57a82007-07-11 23:06:47 +0000373
374 Directories will be copied recursively.
375 If a source component is a directory with a trailing slash,
376 the content of the directory will be copied, otherwise, the
377 directory itself and its content will be copied. This
378 behavior is similar to that of the program 'rsync'.
379
380 Args:
381 source: either
382 1) a single file or directory, as a string
383 2) a list of one or more (possibly mixed)
384 files or directories
385 dest: a file or a directory (if source contains a
386 directory or more than one element, you must
387 supply a directory dest)
388
389 Raises:
390 AutoservRunError: the scp command failed
391 """
392 if isinstance(source, types.StringTypes):
393 source= [source]
394
395 processed_source= []
396 for entry in source:
397 if entry.endswith('/'):
398 format_string= '"%s/"*'
399 else:
400 format_string= '"%s"'
401 entry= format_string % (utils.sh_escape(os.path.abspath(entry)),)
402 processed_source.append(entry)
mbligh7d2bde82007-08-02 16:26:10 +0000403
mblighe6647d12007-10-17 00:00:01 +0000404 result = utils.run(r'%s rsync -h' % self.ssh_command(),
405 ignore_status=True)
mblighd5669092007-08-27 19:01:05 +0000406
mbligh0faf91f2007-10-18 03:10:48 +0000407 remote_dest = '%s@%s:"%s"' % (
408 self.user, self.hostname,
409 utils.scp_remote_escape(dest))
mblighd5669092007-08-27 19:01:05 +0000410 if result.exit_status == 0:
mbligh0faf91f2007-10-18 03:10:48 +0000411 utils.run('rsync --rsh="%s" -az %s %s' % (
412 self.SSH_BASE_COMMAND, " ".join(processed_source),
413 remote_dest))
mblighd5669092007-08-27 19:01:05 +0000414 else:
mbligh0faf91f2007-10-18 03:10:48 +0000415 utils.run('scp -rpq %s %s' % (
416 " ".join(processed_source),
417 remote_dest))
mbligh7d2bde82007-08-02 16:26:10 +0000418
mblighdcd57a82007-07-11 23:06:47 +0000419 def get_tmp_dir(self):
mbligh7d2bde82007-08-02 16:26:10 +0000420 """
421 Return the pathname of a directory on the host suitable
mblighdcd57a82007-07-11 23:06:47 +0000422 for temporary file storage.
423
424 The directory and its content will be deleted automatically
425 on the destruction of the Host object that was used to obtain
426 it.
427 """
mbligha25b29e2007-08-26 13:58:04 +0000428 dir_name= self.run("mktemp -d /tmp/autoserv-XXXXXX").stdout.rstrip(" \n")
mblighdcd57a82007-07-11 23:06:47 +0000429 self.tmp_dirs.append(dir_name)
430 return dir_name
mbligh7d2bde82007-08-02 16:26:10 +0000431
432
mblighdcd57a82007-07-11 23:06:47 +0000433 def is_up(self):
mbligh7d2bde82007-08-02 16:26:10 +0000434 """
435 Check if the remote host is up.
mblighdcd57a82007-07-11 23:06:47 +0000436
437 Returns:
438 True if the remote host is up, False otherwise
439 """
440 try:
441 result= self.run("true", timeout=10)
442 except errors.AutoservRunError:
443 return False
444 else:
445 if result.exit_status == 0:
446 return True
447 else:
mbligh7d2bde82007-08-02 16:26:10 +0000448
mblighdcd57a82007-07-11 23:06:47 +0000449 return False
mbligh7d2bde82007-08-02 16:26:10 +0000450
mblighdcd57a82007-07-11 23:06:47 +0000451 def wait_up(self, timeout=None):
mbligh7d2bde82007-08-02 16:26:10 +0000452 """
453 Wait until the remote host is up or the timeout expires.
mblighdcd57a82007-07-11 23:06:47 +0000454
455 In fact, it will wait until an ssh connection to the remote
456 host can be established.
457
458 Args:
459 timeout: time limit in seconds before returning even
460 if the host is not up.
461
462 Returns:
463 True if the host was found to be up, False otherwise
464 """
465 if timeout:
466 end_time= time.time() + timeout
467
468 while not timeout or time.time() < end_time:
469 try:
mblighe9cf9d42007-08-31 08:56:00 +0000470 run_timeout= 10
mblighdcd57a82007-07-11 23:06:47 +0000471 result= self.run("true", timeout=run_timeout)
472 except errors.AutoservRunError:
473 pass
474 else:
475 if result.exit_status == 0:
476 return True
477 time.sleep(1)
478
479 return False
mbligh7d2bde82007-08-02 16:26:10 +0000480
481
mblighdcd57a82007-07-11 23:06:47 +0000482 def wait_down(self, timeout=None):
mbligh7d2bde82007-08-02 16:26:10 +0000483 """
484 Wait until the remote host is down or the timeout expires.
mblighdcd57a82007-07-11 23:06:47 +0000485
486 In fact, it will wait until an ssh connection to the remote
487 host fails.
488
489 Args:
490 timeout: time limit in seconds before returning even
491 if the host is not up.
492
493 Returns:
494 True if the host was found to be down, False otherwise
495 """
496 if timeout:
497 end_time= time.time() + timeout
498
499 while not timeout or time.time() < end_time:
500 try:
mbligh7e1e9642007-07-31 18:00:45 +0000501 run_timeout= 10
mblighdcd57a82007-07-11 23:06:47 +0000502 result= self.run("true", timeout=run_timeout)
503 except errors.AutoservRunError:
504 return True
505 else:
506 if result.aborted:
507 return True
508 time.sleep(1)
509
510 return False
mbligh7d2bde82007-08-02 16:26:10 +0000511
512
mblighdbe4a382007-07-26 19:41:28 +0000513 def ensure_up(self):
mbligh7d2bde82007-08-02 16:26:10 +0000514 """
515 Ensure the host is up if it is not then do not proceed;
516 this prevents cacading failures of tests
517 """
mbligha0452c82007-08-08 20:24:57 +0000518 print 'Ensuring that %s is up before continuing' % self.hostname
519 if hasattr(self, 'hardreset') and not self.wait_up(300):
mblighdbe4a382007-07-26 19:41:28 +0000520 print "Performing a hardreset on %s" % self.hostname
521 self.hardreset()
mbligha9563b92007-10-25 14:45:56 +0000522 if not self.wait_up(60 * 30):
523 # 30 minutes should be more than enough
524 raise errors.AutoservHostError
mbligha0452c82007-08-08 20:24:57 +0000525 print 'Host up, continuing'
mbligh7d2bde82007-08-02 16:26:10 +0000526
527
mblighdcd57a82007-07-11 23:06:47 +0000528 def get_num_cpu(self):
mbligh7d2bde82007-08-02 16:26:10 +0000529 """
530 Get the number of CPUs in the host according to
mblighdcd57a82007-07-11 23:06:47 +0000531 /proc/cpuinfo.
532
533 Returns:
534 The number of CPUs
535 """
536
mbligh5f876ad2007-10-12 23:59:53 +0000537 proc_cpuinfo = self.run("cat /proc/cpuinfo").stdout
mblighdcd57a82007-07-11 23:06:47 +0000538 cpus = 0
539 for line in proc_cpuinfo.splitlines():
540 if line.startswith('processor'):
541 cpus += 1
542 return cpus
mbligh5f876ad2007-10-12 23:59:53 +0000543
544
545 def check_uptime(self):
546 """
547 Check that uptime is available and monotonically increasing.
548 """
549 if not self.ping():
550 raise "Client is not pingable"
551 result = self.run("/bin/cat /proc/uptime", 30)
552 return result.stdout.strip().split()[0]
553
554
555 def get_arch(self):
556 """
557 Get the hardware architecture of the remote machine
558 """
559 arch = self.run('/bin/uname -m').stdout.rstrip()
560 if re.match(r'i\d86$', arch):
561 arch = 'i386'
562 return arch
563
564
565 def get_kernel_ver(self):
566 """
567 Get the kernel version of the remote machine
568 """
569 return self.run('/bin/uname -r').stdout.rstrip()
570
571
572 def get_cmdline(self):
573 """
574 Get the kernel command line of the remote machine
575 """
576 return self.run('cat /proc/cmdline').stdout.rstrip()
577
578
579 def ping(self):
580 """
581 Ping the remote system, and return whether it's available
582 """
583 fpingcmd = "%s -q %s" % ('/usr/bin/fping', self.hostname)
584 rc = utils.system(fpingcmd, ignore_status = 1)
585 return (rc == 0)