blob: ee93fac37d9a2819a5b5fb269803e3c3a8d67dbf [file] [log] [blame]
mblighdcd57a82007-07-11 23:06:47 +00001#!/usr/bin/python
2#
3# Copyright 2007 Google Inc. Released under the GPL v2
4
mbligh7d2bde82007-08-02 16:26:10 +00005"""
6This module defines the SSHHost class.
mblighdcd57a82007-07-11 23:06:47 +00007
8Implementation details:
9You should import the "hosts" package instead of importing each type of host.
10
11 SSHHost: a remote machine with a ssh access
12"""
13
mbligh7d2bde82007-08-02 16:26:10 +000014__author__ = """
15mbligh@google.com (Martin J. Bligh),
mblighdcd57a82007-07-11 23:06:47 +000016poirier@google.com (Benjamin Poirier),
mbligh7d2bde82007-08-02 16:26:10 +000017stutsman@google.com (Ryan Stutsman)
18"""
mblighdcd57a82007-07-11 23:06:47 +000019
20
mblighde384372007-10-17 04:25:37 +000021import types, os, sys, signal, subprocess, time, re, socket
mbligh5f876ad2007-10-12 23:59:53 +000022import base_classes, utils, errors, bootloader
mblighdcd57a82007-07-11 23:06:47 +000023
24
25class SSHHost(base_classes.RemoteHost):
mbligh7d2bde82007-08-02 16:26:10 +000026 """
27 This class represents a remote machine controlled through an ssh
mblighdcd57a82007-07-11 23:06:47 +000028 session on which you can run programs.
mbligh7d2bde82007-08-02 16:26:10 +000029
mblighdcd57a82007-07-11 23:06:47 +000030 It is not the machine autoserv is running on. The machine must be
31 configured for password-less login, for example through public key
32 authentication.
mbligh7d2bde82007-08-02 16:26:10 +000033
mbligh3409ee72007-10-16 23:58:33 +000034 It includes support for controlling the machine through a serial
35 console on which you can run programs. If such a serial console is
36 set up on the machine then capabilities such as hard reset and
37 boot strap monitoring are available. If the machine does not have a
38 serial console available then ordinary SSH-based commands will
39 still be available, but attempts to use extensions such as
40 console logging or hard reset will fail silently.
41
mblighdcd57a82007-07-11 23:06:47 +000042 Implementation details:
43 This is a leaf class in an abstract class hierarchy, it must
44 implement the unimplemented methods in parent classes.
45 """
mbligh7d2bde82007-08-02 16:26:10 +000046
mbligh0faf91f2007-10-18 03:10:48 +000047 SSH_BASE_COMMAND = 'ssh -a'
48
mblighde384372007-10-17 04:25:37 +000049 def __init__(self, hostname, user="root", port=22, initialize=True,
50 conmux_log=None, conmux_server=None, conmux_attach=None,
51 netconsole_log=None, netconsole_port=6666):
mbligh7d2bde82007-08-02 16:26:10 +000052 """
53 Construct a SSHHost object
mblighdcd57a82007-07-11 23:06:47 +000054
55 Args:
56 hostname: network hostname or address of remote machine
57 user: user to log in as on the remote machine
58 port: port the ssh daemon is listening on on the remote
59 machine
mbligh9708f732007-10-18 03:18:54 +000060 """
mblighdcd57a82007-07-11 23:06:47 +000061 self.hostname= hostname
62 self.user= user
63 self.port= port
64 self.tmp_dirs= []
mbligh137a05c2007-10-04 15:56:51 +000065 self.initialize = initialize
mbligh91334902007-09-28 01:47:59 +000066
mbligh9708f732007-10-18 03:18:54 +000067 super(SSHHost, self).__init__()
68
mbligh3409ee72007-10-16 23:58:33 +000069 self.conmux_server = conmux_server
70 self.conmux_attach = self.__find_console_attach(conmux_attach)
71 self.logger_pid = None
mblighde384372007-10-17 04:25:37 +000072 self.__start_console_log(conmux_log)
mbligh3409ee72007-10-16 23:58:33 +000073
mbligha0452c82007-08-08 20:24:57 +000074 self.bootloader = bootloader.Bootloader(self)
mbligh7d2bde82007-08-02 16:26:10 +000075
mblighde384372007-10-17 04:25:37 +000076 self.__init_netconsole_params(netconsole_port)
77 self.netlogger_pid = None
78 self.__start_netconsole_log(netconsole_log, netconsole_port)
79 self.__load_netconsole_module()
80
mbligh7d2bde82007-08-02 16:26:10 +000081
mblighdcd57a82007-07-11 23:06:47 +000082 def __del__(self):
mbligh7d2bde82007-08-02 16:26:10 +000083 """
84 Destroy a SSHHost object
mblighdcd57a82007-07-11 23:06:47 +000085 """
86 for dir in self.tmp_dirs:
87 try:
88 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
89 except errors.AutoservRunError:
90 pass
mblighde384372007-10-17 04:25:37 +000091 # kill the console logger
mbligh3409ee72007-10-16 23:58:33 +000092 if self.logger_pid:
93 try:
94 pgid = os.getpgid(self.logger_pid)
95 os.killpg(pgid, signal.SIGTERM)
96 except OSError:
97 pass
mblighde384372007-10-17 04:25:37 +000098 # kill the netconsole logger
99 if self.netlogger_pid:
100 try:
101 os.kill(self.netlogger_pid, signal.SIGTERM)
102 except OSError:
103 pass
104
105
106 def __init_netconsole_params(self, port):
107 """
108 Connect to the remote machine and determine the values to use for the
109 required netconsole parameters.
110 """
111 self.__netconsole_param = ""
112 # PROBLEM: on machines with multiple IPs this may not make any sense
113 # It also doesn't work with IPv6
114 remote_ip = socket.gethostbyname(self.hostname)
115 local_ip = socket.gethostbyname(socket.gethostname())
116 # Get the gateway of the remote machine
117 try:
118 traceroute = self.run('traceroute -n %s' % local_ip)
119 except errors.AutoservRunError:
120 return
121 first_node = traceroute.stdout.split("\n")[0]
122 match = re.search(r'\s+((\d+\.){3}\d+)\s+', first_node)
123 if match:
124 router_ip = match.group(1)
125 else:
126 return
127 # Look up the MAC address of the gateway
128 try:
129 self.run('ping -c 1 %s' % router_ip)
130 arp = self.run('arp -n -a %s' % router_ip)
131 except errors.AutoservRunError:
132 return
133 match = re.search(r'\s+(([0-9A-F]{2}:){5}[0-9A-F]{2})\s+', arp.stdout)
134 if match:
135 gateway_mac = match.group(1)
136 else:
137 return
138 self.__netconsole_param = 'netconsole=@%s/,%s@%s/%s' % (remote_ip,
139 port,
140 local_ip,
141 gateway_mac)
142
143
144 def __start_netconsole_log(self, logfilename, port):
145 """
146 Log the output of netconsole to a specified file
147 """
148 if logfilename == None:
149 return
150 cmd = ['nc', '-u', '-l', '-p', str(port)]
151 logger = subprocess.Popen(cmd, stdout=open(logfilename, "w"))
152 self.netlogger_pid = logger.pid
153
154
155 def __load_netconsole_module(self):
156 """
157 Make a best effort to load the netconsole module.
158
159 Note that loading the module can fail even when the remote machine is
160 working correctly if netconsole is already compiled into the kernel
161 and started.
162 """
163 try:
164 self.run('modprobe netconsole %s' % self.__netconsole_param)
165 except errors.AutoservRunError:
166 # if it fails there isn't much we can do, just keep going
167 pass
168
169
170 def __unload_netconsole_module(self):
171 try:
172 self.run('modprobe -r netconsole')
173 except errors.AutoservRunError:
174 pass
mbligh3409ee72007-10-16 23:58:33 +0000175
176
177 def __wait_for_restart(self, timeout):
178 self.wait_down(60) # Make sure he's dead, Jim
179 self.wait_up(timeout)
180 time.sleep(2) # this is needed for complete reliability
181 self.wait_up(timeout)
182 print "Reboot complete"
183
184
185 def hardreset(self, timeout=600, wait=True):
186 """
187 Reach out and slap the box in the power switch
188 """
189 result = self.__console_run(r"'~$hardreset'")
190 if wait:
191 self.__wait_for_restart(timeout)
192 return result
193
194
195 def __start_console_log(self, logfilename):
196 """
197 Log the output of the console session to a specified file
198 """
199 if logfilename == None:
200 return
201 if not self.conmux_attach or not os.path.exists(self.conmux_attach):
202 return
203 if self.conmux_server:
204 to = '%s/%s' % (self.conmux_server, self.hostname)
205 else:
206 to = self.hostname
207 cmd = [self.conmux_attach, to, 'cat - > %s' % logfilename]
208 logger = subprocess.Popen(cmd,
209 stderr=open('/dev/null', 'w'),
210 preexec_fn=lambda: os.setpgid(0, 0))
211 self.logger_pid = logger.pid
212
213
214 def __find_console_attach(self, conmux_attach):
215 if conmux_attach:
216 return conmux_attach
217 try:
218 res = utils.run('which conmux-attach')
219 if res.exit_status == 0:
220 return res.stdout.strip()
221 except errors.AutoservRunError, e:
222 pass
mbligh9708f732007-10-18 03:18:54 +0000223 autotest_conmux = os.path.join(self.serverdir, '..',
mbligh3409ee72007-10-16 23:58:33 +0000224 'conmux', 'conmux-attach')
mbligh9708f732007-10-18 03:18:54 +0000225 autotest_conmux_alt = os.path.join(self.serverdir,
mbligh3409ee72007-10-16 23:58:33 +0000226 '..', 'autotest',
227 'conmux', 'conmux-attach')
228 locations = [autotest_conmux,
229 autotest_conmux_alt,
230 '/usr/local/conmux/bin/conmux-attach',
231 '/usr/bin/conmux-attach']
232 for l in locations:
233 if os.path.exists(l):
234 return l
235
236 print "WARNING: conmux-attach not found on autoserv server"
237 return None
238
239
240 def __console_run(self, cmd):
241 """
242 Send a command to the conmux session
243 """
244 if not self.conmux_attach or not os.path.exists(self.conmux_attach):
245 return False
246 if self.conmux_server:
247 to = '%s/%s' % (self.conmux_server, self.hostname)
248 else:
249 to = self.hostname
250 cmd = '%s %s echo %s 2> /dev/null' % (self.conmux_attach,
251 to,
252 cmd)
253 result = os.system(cmd)
254 return result == 0
mbligh7d2bde82007-08-02 16:26:10 +0000255
256
mblighe6647d12007-10-17 00:00:01 +0000257 def ssh_command(self):
258 """Construct an ssh command with proper args for this host."""
mbligh0faf91f2007-10-18 03:10:48 +0000259 return r'%s -l %s -p %d %s' % (self.SSH_BASE_COMMAND,
260 self.user,
261 self.port,
262 self.hostname)
mblighe6647d12007-10-17 00:00:01 +0000263
264
mblighcf965b02007-07-25 16:49:45 +0000265 def run(self, command, timeout=None, ignore_status=False):
mbligh7d2bde82007-08-02 16:26:10 +0000266 """
267 Run a command on the remote host.
mblighdcd57a82007-07-11 23:06:47 +0000268
269 Args:
270 command: the command line string
271 timeout: time limit in seconds before attempting to
272 kill the running process. The run() function
273 will take a few seconds longer than 'timeout'
274 to complete if it has to kill the process.
mbligh8b85dfb2007-08-28 09:50:31 +0000275 ignore_status: do not raise an exception, no matter
276 what the exit code of the command is.
mblighdcd57a82007-07-11 23:06:47 +0000277
278 Returns:
279 a hosts.base_classes.CmdResult object
280
281 Raises:
282 AutoservRunError: the exit code of the command
283 execution was not 0
284 """
285 #~ print "running %s" % (command,)
mblighe6647d12007-10-17 00:00:01 +0000286 result= utils.run(r'%s "%s"' % (self.ssh_command(),
287 utils.sh_escape(command)),
288 timeout, ignore_status)
mblighdcd57a82007-07-11 23:06:47 +0000289 return result
mbligh7d2bde82007-08-02 16:26:10 +0000290
291
mbligha0452c82007-08-08 20:24:57 +0000292 def reboot(self, timeout=600, label=None, kernel_args=None, wait=True):
mbligh7d2bde82007-08-02 16:26:10 +0000293 """
294 Reboot the remote host.
mbligh8b85dfb2007-08-28 09:50:31 +0000295
mbligha0452c82007-08-08 20:24:57 +0000296 Args:
297 timeout
mbligh8b85dfb2007-08-28 09:50:31 +0000298 """
mblighde384372007-10-17 04:25:37 +0000299 # forcibly include the "netconsole" kernel arg
300 if self.__netconsole_param:
301 if kernel_args is None:
302 kernel_args = self.__netconsole_param
303 else:
304 kernel_args += " " + self.__netconsole_param
305 # unload the (possibly loaded) module to avoid shutdown issues
306 self.__unload_netconsole_module()
mbligha0452c82007-08-08 20:24:57 +0000307 if label or kernel_args:
308 self.bootloader.install_boottool()
309 if label:
310 self.bootloader.set_default(label)
311 if kernel_args:
312 if not label:
313 default = int(self.bootloader.get_default())
314 label = self.bootloader.get_titles()[default]
315 self.bootloader.add_args(label, kernel_args)
mblighd742a222007-09-30 01:27:06 +0000316 print "Reboot: initiating reboot"
mbligha0452c82007-08-08 20:24:57 +0000317 self.run('reboot')
318 if wait:
mbligh3409ee72007-10-16 23:58:33 +0000319 self.__wait_for_restart(timeout)
mblighde384372007-10-17 04:25:37 +0000320 self.__load_netconsole_module() # if the builtin fails
mbligha0452c82007-08-08 20:24:57 +0000321
mbligh7d2bde82007-08-02 16:26:10 +0000322
mblighdcd57a82007-07-11 23:06:47 +0000323 def get_file(self, source, dest):
mbligh7d2bde82007-08-02 16:26:10 +0000324 """
325 Copy files from the remote host to a local path.
mblighdcd57a82007-07-11 23:06:47 +0000326
327 Directories will be copied recursively.
328 If a source component is a directory with a trailing slash,
329 the content of the directory will be copied, otherwise, the
330 directory itself and its content will be copied. This
331 behavior is similar to that of the program 'rsync'.
332
333 Args:
334 source: either
335 1) a single file or directory, as a string
336 2) a list of one or more (possibly mixed)
337 files or directories
338 dest: a file or a directory (if source contains a
339 directory or more than one element, you must
340 supply a directory dest)
341
342 Raises:
343 AutoservRunError: the scp command failed
344 """
345 if isinstance(source, types.StringTypes):
346 source= [source]
347
348 processed_source= []
349 for entry in source:
350 if entry.endswith('/'):
351 format_string= '%s@%s:"%s*"'
352 else:
353 format_string= '%s@%s:"%s"'
354 entry= format_string % (self.user, self.hostname,
355 utils.scp_remote_escape(entry))
356 processed_source.append(entry)
357
358 processed_dest= os.path.abspath(dest)
359 if os.path.isdir(dest):
360 processed_dest= "%s/" % (utils.sh_escape(processed_dest),)
361 else:
362 processed_dest= utils.sh_escape(processed_dest)
363
364 utils.run('scp -rpq %s "%s"' % (
365 " ".join(processed_source),
366 processed_dest))
mbligh7d2bde82007-08-02 16:26:10 +0000367
368
mblighdcd57a82007-07-11 23:06:47 +0000369 def send_file(self, source, dest):
mbligh7d2bde82007-08-02 16:26:10 +0000370 """
371 Copy files from a local path to the remote host.
mblighdcd57a82007-07-11 23:06:47 +0000372
373 Directories will be copied recursively.
374 If a source component is a directory with a trailing slash,
375 the content of the directory will be copied, otherwise, the
376 directory itself and its content will be copied. This
377 behavior is similar to that of the program 'rsync'.
378
379 Args:
380 source: either
381 1) a single file or directory, as a string
382 2) a list of one or more (possibly mixed)
383 files or directories
384 dest: a file or a directory (if source contains a
385 directory or more than one element, you must
386 supply a directory dest)
387
388 Raises:
389 AutoservRunError: the scp command failed
390 """
391 if isinstance(source, types.StringTypes):
392 source= [source]
393
394 processed_source= []
395 for entry in source:
396 if entry.endswith('/'):
397 format_string= '"%s/"*'
398 else:
399 format_string= '"%s"'
400 entry= format_string % (utils.sh_escape(os.path.abspath(entry)),)
401 processed_source.append(entry)
mbligh7d2bde82007-08-02 16:26:10 +0000402
mblighe6647d12007-10-17 00:00:01 +0000403 result = utils.run(r'%s rsync -h' % self.ssh_command(),
404 ignore_status=True)
mblighd5669092007-08-27 19:01:05 +0000405
mbligh0faf91f2007-10-18 03:10:48 +0000406 remote_dest = '%s@%s:"%s"' % (
407 self.user, self.hostname,
408 utils.scp_remote_escape(dest))
mblighd5669092007-08-27 19:01:05 +0000409 if result.exit_status == 0:
mbligh0faf91f2007-10-18 03:10:48 +0000410 utils.run('rsync --rsh="%s" -az %s %s' % (
411 self.SSH_BASE_COMMAND, " ".join(processed_source),
412 remote_dest))
mblighd5669092007-08-27 19:01:05 +0000413 else:
mbligh0faf91f2007-10-18 03:10:48 +0000414 utils.run('scp -rpq %s %s' % (
415 " ".join(processed_source),
416 remote_dest))
mbligh7d2bde82007-08-02 16:26:10 +0000417
mblighdcd57a82007-07-11 23:06:47 +0000418 def get_tmp_dir(self):
mbligh7d2bde82007-08-02 16:26:10 +0000419 """
420 Return the pathname of a directory on the host suitable
mblighdcd57a82007-07-11 23:06:47 +0000421 for temporary file storage.
422
423 The directory and its content will be deleted automatically
424 on the destruction of the Host object that was used to obtain
425 it.
426 """
mbligha25b29e2007-08-26 13:58:04 +0000427 dir_name= self.run("mktemp -d /tmp/autoserv-XXXXXX").stdout.rstrip(" \n")
mblighdcd57a82007-07-11 23:06:47 +0000428 self.tmp_dirs.append(dir_name)
429 return dir_name
mbligh7d2bde82007-08-02 16:26:10 +0000430
431
mblighdcd57a82007-07-11 23:06:47 +0000432 def is_up(self):
mbligh7d2bde82007-08-02 16:26:10 +0000433 """
434 Check if the remote host is up.
mblighdcd57a82007-07-11 23:06:47 +0000435
436 Returns:
437 True if the remote host is up, False otherwise
438 """
439 try:
440 result= self.run("true", timeout=10)
441 except errors.AutoservRunError:
442 return False
443 else:
444 if result.exit_status == 0:
445 return True
446 else:
mbligh7d2bde82007-08-02 16:26:10 +0000447
mblighdcd57a82007-07-11 23:06:47 +0000448 return False
mbligh7d2bde82007-08-02 16:26:10 +0000449
mblighdcd57a82007-07-11 23:06:47 +0000450 def wait_up(self, timeout=None):
mbligh7d2bde82007-08-02 16:26:10 +0000451 """
452 Wait until the remote host is up or the timeout expires.
mblighdcd57a82007-07-11 23:06:47 +0000453
454 In fact, it will wait until an ssh connection to the remote
455 host can be established.
456
457 Args:
458 timeout: time limit in seconds before returning even
459 if the host is not up.
460
461 Returns:
462 True if the host was found to be up, False otherwise
463 """
464 if timeout:
465 end_time= time.time() + timeout
466
467 while not timeout or time.time() < end_time:
468 try:
mblighe9cf9d42007-08-31 08:56:00 +0000469 run_timeout= 10
mblighdcd57a82007-07-11 23:06:47 +0000470 result= self.run("true", timeout=run_timeout)
471 except errors.AutoservRunError:
472 pass
473 else:
474 if result.exit_status == 0:
475 return True
476 time.sleep(1)
477
478 return False
mbligh7d2bde82007-08-02 16:26:10 +0000479
480
mblighdcd57a82007-07-11 23:06:47 +0000481 def wait_down(self, timeout=None):
mbligh7d2bde82007-08-02 16:26:10 +0000482 """
483 Wait until the remote host is down or the timeout expires.
mblighdcd57a82007-07-11 23:06:47 +0000484
485 In fact, it will wait until an ssh connection to the remote
486 host fails.
487
488 Args:
489 timeout: time limit in seconds before returning even
490 if the host is not up.
491
492 Returns:
493 True if the host was found to be down, False otherwise
494 """
495 if timeout:
496 end_time= time.time() + timeout
497
498 while not timeout or time.time() < end_time:
499 try:
mbligh7e1e9642007-07-31 18:00:45 +0000500 run_timeout= 10
mblighdcd57a82007-07-11 23:06:47 +0000501 result= self.run("true", timeout=run_timeout)
502 except errors.AutoservRunError:
503 return True
504 else:
505 if result.aborted:
506 return True
507 time.sleep(1)
508
509 return False
mbligh7d2bde82007-08-02 16:26:10 +0000510
511
mblighdbe4a382007-07-26 19:41:28 +0000512 def ensure_up(self):
mbligh7d2bde82007-08-02 16:26:10 +0000513 """
514 Ensure the host is up if it is not then do not proceed;
515 this prevents cacading failures of tests
516 """
mbligha0452c82007-08-08 20:24:57 +0000517 print 'Ensuring that %s is up before continuing' % self.hostname
518 if hasattr(self, 'hardreset') and not self.wait_up(300):
mblighdbe4a382007-07-26 19:41:28 +0000519 print "Performing a hardreset on %s" % self.hostname
520 self.hardreset()
521 self.wait_up()
mbligha0452c82007-08-08 20:24:57 +0000522 print 'Host up, continuing'
mbligh7d2bde82007-08-02 16:26:10 +0000523
524
mblighdcd57a82007-07-11 23:06:47 +0000525 def get_num_cpu(self):
mbligh7d2bde82007-08-02 16:26:10 +0000526 """
527 Get the number of CPUs in the host according to
mblighdcd57a82007-07-11 23:06:47 +0000528 /proc/cpuinfo.
529
530 Returns:
531 The number of CPUs
532 """
533
mbligh5f876ad2007-10-12 23:59:53 +0000534 proc_cpuinfo = self.run("cat /proc/cpuinfo").stdout
mblighdcd57a82007-07-11 23:06:47 +0000535 cpus = 0
536 for line in proc_cpuinfo.splitlines():
537 if line.startswith('processor'):
538 cpus += 1
539 return cpus
mbligh5f876ad2007-10-12 23:59:53 +0000540
541
542 def check_uptime(self):
543 """
544 Check that uptime is available and monotonically increasing.
545 """
546 if not self.ping():
547 raise "Client is not pingable"
548 result = self.run("/bin/cat /proc/uptime", 30)
549 return result.stdout.strip().split()[0]
550
551
552 def get_arch(self):
553 """
554 Get the hardware architecture of the remote machine
555 """
556 arch = self.run('/bin/uname -m').stdout.rstrip()
557 if re.match(r'i\d86$', arch):
558 arch = 'i386'
559 return arch
560
561
562 def get_kernel_ver(self):
563 """
564 Get the kernel version of the remote machine
565 """
566 return self.run('/bin/uname -r').stdout.rstrip()
567
568
569 def get_cmdline(self):
570 """
571 Get the kernel command line of the remote machine
572 """
573 return self.run('cat /proc/cmdline').stdout.rstrip()
574
575
576 def ping(self):
577 """
578 Ping the remote system, and return whether it's available
579 """
580 fpingcmd = "%s -q %s" % ('/usr/bin/fping', self.hostname)
581 rc = utils.system(fpingcmd, ignore_status = 1)
582 return (rc == 0)