blob: e3874b439f3732d8ae71d6d9401de8fcf7d1b863 [file] [log] [blame]
mblighdcd57a82007-07-11 23:06:47 +00001#!/usr/bin/python
2#
3# Copyright 2007 Google Inc. Released under the GPL v2
4
mbligh7d2bde82007-08-02 16:26:10 +00005"""
6This module defines the SSHHost class.
mblighdcd57a82007-07-11 23:06:47 +00007
8Implementation details:
9You should import the "hosts" package instead of importing each type of host.
10
11 SSHHost: a remote machine with a ssh access
12"""
13
mbligh7d2bde82007-08-02 16:26:10 +000014__author__ = """
15mbligh@google.com (Martin J. Bligh),
mblighdcd57a82007-07-11 23:06:47 +000016poirier@google.com (Benjamin Poirier),
mbligh7d2bde82007-08-02 16:26:10 +000017stutsman@google.com (Ryan Stutsman)
18"""
mblighdcd57a82007-07-11 23:06:47 +000019
20
mblighde384372007-10-17 04:25:37 +000021import types, os, sys, signal, subprocess, time, re, socket
mbligh5f876ad2007-10-12 23:59:53 +000022import base_classes, utils, errors, bootloader
mblighdcd57a82007-07-11 23:06:47 +000023
24
25class SSHHost(base_classes.RemoteHost):
mbligh7d2bde82007-08-02 16:26:10 +000026 """
27 This class represents a remote machine controlled through an ssh
mblighdcd57a82007-07-11 23:06:47 +000028 session on which you can run programs.
mbligh7d2bde82007-08-02 16:26:10 +000029
mblighdcd57a82007-07-11 23:06:47 +000030 It is not the machine autoserv is running on. The machine must be
31 configured for password-less login, for example through public key
32 authentication.
mbligh7d2bde82007-08-02 16:26:10 +000033
mbligh3409ee72007-10-16 23:58:33 +000034 It includes support for controlling the machine through a serial
35 console on which you can run programs. If such a serial console is
36 set up on the machine then capabilities such as hard reset and
37 boot strap monitoring are available. If the machine does not have a
38 serial console available then ordinary SSH-based commands will
39 still be available, but attempts to use extensions such as
40 console logging or hard reset will fail silently.
41
mblighdcd57a82007-07-11 23:06:47 +000042 Implementation details:
43 This is a leaf class in an abstract class hierarchy, it must
44 implement the unimplemented methods in parent classes.
45 """
mbligh7d2bde82007-08-02 16:26:10 +000046
mbligh0faf91f2007-10-18 03:10:48 +000047 SSH_BASE_COMMAND = 'ssh -a'
48
mblighde384372007-10-17 04:25:37 +000049 def __init__(self, hostname, user="root", port=22, initialize=True,
50 conmux_log=None, conmux_server=None, conmux_attach=None,
51 netconsole_log=None, netconsole_port=6666):
mbligh7d2bde82007-08-02 16:26:10 +000052 """
53 Construct a SSHHost object
mblighdcd57a82007-07-11 23:06:47 +000054
55 Args:
56 hostname: network hostname or address of remote machine
57 user: user to log in as on the remote machine
58 port: port the ssh daemon is listening on on the remote
59 machine
60 """
mblighdcd57a82007-07-11 23:06:47 +000061 self.hostname= hostname
62 self.user= user
63 self.port= port
64 self.tmp_dirs= []
mbligh137a05c2007-10-04 15:56:51 +000065 self.initialize = initialize
mbligh91334902007-09-28 01:47:59 +000066
mbligh3409ee72007-10-16 23:58:33 +000067 self.conmux_server = conmux_server
68 self.conmux_attach = self.__find_console_attach(conmux_attach)
69 self.logger_pid = None
mblighde384372007-10-17 04:25:37 +000070 self.__start_console_log(conmux_log)
mbligh3409ee72007-10-16 23:58:33 +000071
mbligh91334902007-09-28 01:47:59 +000072 super(SSHHost, self).__init__()
mbligha0452c82007-08-08 20:24:57 +000073 self.bootloader = bootloader.Bootloader(self)
mbligh7d2bde82007-08-02 16:26:10 +000074
mblighde384372007-10-17 04:25:37 +000075 self.__init_netconsole_params(netconsole_port)
76 self.netlogger_pid = None
77 self.__start_netconsole_log(netconsole_log, netconsole_port)
78 self.__load_netconsole_module()
79
mbligh7d2bde82007-08-02 16:26:10 +000080
mblighdcd57a82007-07-11 23:06:47 +000081 def __del__(self):
mbligh7d2bde82007-08-02 16:26:10 +000082 """
83 Destroy a SSHHost object
mblighdcd57a82007-07-11 23:06:47 +000084 """
85 for dir in self.tmp_dirs:
86 try:
87 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
88 except errors.AutoservRunError:
89 pass
mblighde384372007-10-17 04:25:37 +000090 # kill the console logger
mbligh3409ee72007-10-16 23:58:33 +000091 if self.logger_pid:
92 try:
93 pgid = os.getpgid(self.logger_pid)
94 os.killpg(pgid, signal.SIGTERM)
95 except OSError:
96 pass
mblighde384372007-10-17 04:25:37 +000097 # kill the netconsole logger
98 if self.netlogger_pid:
99 try:
100 os.kill(self.netlogger_pid, signal.SIGTERM)
101 except OSError:
102 pass
103
104
105 def __init_netconsole_params(self, port):
106 """
107 Connect to the remote machine and determine the values to use for the
108 required netconsole parameters.
109 """
110 self.__netconsole_param = ""
111 # PROBLEM: on machines with multiple IPs this may not make any sense
112 # It also doesn't work with IPv6
113 remote_ip = socket.gethostbyname(self.hostname)
114 local_ip = socket.gethostbyname(socket.gethostname())
115 # Get the gateway of the remote machine
116 try:
117 traceroute = self.run('traceroute -n %s' % local_ip)
118 except errors.AutoservRunError:
119 return
120 first_node = traceroute.stdout.split("\n")[0]
121 match = re.search(r'\s+((\d+\.){3}\d+)\s+', first_node)
122 if match:
123 router_ip = match.group(1)
124 else:
125 return
126 # Look up the MAC address of the gateway
127 try:
128 self.run('ping -c 1 %s' % router_ip)
129 arp = self.run('arp -n -a %s' % router_ip)
130 except errors.AutoservRunError:
131 return
132 match = re.search(r'\s+(([0-9A-F]{2}:){5}[0-9A-F]{2})\s+', arp.stdout)
133 if match:
134 gateway_mac = match.group(1)
135 else:
136 return
137 self.__netconsole_param = 'netconsole=@%s/,%s@%s/%s' % (remote_ip,
138 port,
139 local_ip,
140 gateway_mac)
141
142
143 def __start_netconsole_log(self, logfilename, port):
144 """
145 Log the output of netconsole to a specified file
146 """
147 if logfilename == None:
148 return
149 cmd = ['nc', '-u', '-l', '-p', str(port)]
150 logger = subprocess.Popen(cmd, stdout=open(logfilename, "w"))
151 self.netlogger_pid = logger.pid
152
153
154 def __load_netconsole_module(self):
155 """
156 Make a best effort to load the netconsole module.
157
158 Note that loading the module can fail even when the remote machine is
159 working correctly if netconsole is already compiled into the kernel
160 and started.
161 """
162 try:
163 self.run('modprobe netconsole %s' % self.__netconsole_param)
164 except errors.AutoservRunError:
165 # if it fails there isn't much we can do, just keep going
166 pass
167
168
169 def __unload_netconsole_module(self):
170 try:
171 self.run('modprobe -r netconsole')
172 except errors.AutoservRunError:
173 pass
mbligh3409ee72007-10-16 23:58:33 +0000174
175
176 def __wait_for_restart(self, timeout):
177 self.wait_down(60) # Make sure he's dead, Jim
178 self.wait_up(timeout)
179 time.sleep(2) # this is needed for complete reliability
180 self.wait_up(timeout)
181 print "Reboot complete"
182
183
184 def hardreset(self, timeout=600, wait=True):
185 """
186 Reach out and slap the box in the power switch
187 """
188 result = self.__console_run(r"'~$hardreset'")
189 if wait:
190 self.__wait_for_restart(timeout)
191 return result
192
193
194 def __start_console_log(self, logfilename):
195 """
196 Log the output of the console session to a specified file
197 """
198 if logfilename == None:
199 return
200 if not self.conmux_attach or not os.path.exists(self.conmux_attach):
201 return
202 if self.conmux_server:
203 to = '%s/%s' % (self.conmux_server, self.hostname)
204 else:
205 to = self.hostname
206 cmd = [self.conmux_attach, to, 'cat - > %s' % logfilename]
207 logger = subprocess.Popen(cmd,
208 stderr=open('/dev/null', 'w'),
209 preexec_fn=lambda: os.setpgid(0, 0))
210 self.logger_pid = logger.pid
211
212
213 def __find_console_attach(self, conmux_attach):
214 if conmux_attach:
215 return conmux_attach
216 try:
217 res = utils.run('which conmux-attach')
218 if res.exit_status == 0:
219 return res.stdout.strip()
220 except errors.AutoservRunError, e:
221 pass
222 autoserv_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
223 autotest_conmux = os.path.join(autoserv_dir, '..',
224 'conmux', 'conmux-attach')
225 autotest_conmux_alt = os.path.join(autoserv_dir,
226 '..', 'autotest',
227 'conmux', 'conmux-attach')
228 locations = [autotest_conmux,
229 autotest_conmux_alt,
230 '/usr/local/conmux/bin/conmux-attach',
231 '/usr/bin/conmux-attach']
232 for l in locations:
233 if os.path.exists(l):
234 return l
235
236 print "WARNING: conmux-attach not found on autoserv server"
237 return None
238
239
240 def __console_run(self, cmd):
241 """
242 Send a command to the conmux session
243 """
244 if not self.conmux_attach or not os.path.exists(self.conmux_attach):
245 return False
246 if self.conmux_server:
247 to = '%s/%s' % (self.conmux_server, self.hostname)
248 else:
249 to = self.hostname
250 cmd = '%s %s echo %s 2> /dev/null' % (self.conmux_attach,
251 to,
252 cmd)
253 result = os.system(cmd)
254 return result == 0
mbligh7d2bde82007-08-02 16:26:10 +0000255
256
mblighe6647d12007-10-17 00:00:01 +0000257 def ssh_command(self):
258 """Construct an ssh command with proper args for this host."""
mbligh0faf91f2007-10-18 03:10:48 +0000259 return r'%s -l %s -p %d %s' % (self.SSH_BASE_COMMAND,
260 self.user,
261 self.port,
262 self.hostname)
mblighe6647d12007-10-17 00:00:01 +0000263
264
mblighcf965b02007-07-25 16:49:45 +0000265 def run(self, command, timeout=None, ignore_status=False):
mbligh7d2bde82007-08-02 16:26:10 +0000266 """
267 Run a command on the remote host.
mblighdcd57a82007-07-11 23:06:47 +0000268
269 Args:
270 command: the command line string
271 timeout: time limit in seconds before attempting to
272 kill the running process. The run() function
273 will take a few seconds longer than 'timeout'
274 to complete if it has to kill the process.
mbligh8b85dfb2007-08-28 09:50:31 +0000275 ignore_status: do not raise an exception, no matter
276 what the exit code of the command is.
mblighdcd57a82007-07-11 23:06:47 +0000277
278 Returns:
279 a hosts.base_classes.CmdResult object
280
281 Raises:
282 AutoservRunError: the exit code of the command
283 execution was not 0
284 """
285 #~ print "running %s" % (command,)
mblighe6647d12007-10-17 00:00:01 +0000286 result= utils.run(r'%s "%s"' % (self.ssh_command(),
287 utils.sh_escape(command)),
288 timeout, ignore_status)
mblighdcd57a82007-07-11 23:06:47 +0000289 return result
mbligh7d2bde82007-08-02 16:26:10 +0000290
291
mbligha0452c82007-08-08 20:24:57 +0000292 def reboot(self, timeout=600, label=None, kernel_args=None, wait=True):
mbligh7d2bde82007-08-02 16:26:10 +0000293 """
294 Reboot the remote host.
mbligh8b85dfb2007-08-28 09:50:31 +0000295
mbligha0452c82007-08-08 20:24:57 +0000296 Args:
297 timeout
mbligh8b85dfb2007-08-28 09:50:31 +0000298 """
mblighde384372007-10-17 04:25:37 +0000299 # forcibly include the "netconsole" kernel arg
300 if self.__netconsole_param:
301 if kernel_args is None:
302 kernel_args = self.__netconsole_param
303 else:
304 kernel_args += " " + self.__netconsole_param
305 # unload the (possibly loaded) module to avoid shutdown issues
306 self.__unload_netconsole_module()
mbligha0452c82007-08-08 20:24:57 +0000307 if label or kernel_args:
308 self.bootloader.install_boottool()
309 if label:
310 self.bootloader.set_default(label)
311 if kernel_args:
312 if not label:
313 default = int(self.bootloader.get_default())
314 label = self.bootloader.get_titles()[default]
315 self.bootloader.add_args(label, kernel_args)
mblighd742a222007-09-30 01:27:06 +0000316 print "Reboot: initiating reboot"
mbligha0452c82007-08-08 20:24:57 +0000317 self.run('reboot')
318 if wait:
mbligh3409ee72007-10-16 23:58:33 +0000319 self.__wait_for_restart(timeout)
mblighde384372007-10-17 04:25:37 +0000320 self.__load_netconsole_module() # if the builtin fails
mbligha0452c82007-08-08 20:24:57 +0000321
mbligh7d2bde82007-08-02 16:26:10 +0000322
mblighdcd57a82007-07-11 23:06:47 +0000323 def get_file(self, source, dest):
mbligh7d2bde82007-08-02 16:26:10 +0000324 """
325 Copy files from the remote host to a local path.
mblighdcd57a82007-07-11 23:06:47 +0000326
327 Directories will be copied recursively.
328 If a source component is a directory with a trailing slash,
329 the content of the directory will be copied, otherwise, the
330 directory itself and its content will be copied. This
331 behavior is similar to that of the program 'rsync'.
332
333 Args:
334 source: either
335 1) a single file or directory, as a string
336 2) a list of one or more (possibly mixed)
337 files or directories
338 dest: a file or a directory (if source contains a
339 directory or more than one element, you must
340 supply a directory dest)
341
342 Raises:
343 AutoservRunError: the scp command failed
344 """
345 if isinstance(source, types.StringTypes):
346 source= [source]
347
348 processed_source= []
349 for entry in source:
350 if entry.endswith('/'):
351 format_string= '%s@%s:"%s*"'
352 else:
353 format_string= '%s@%s:"%s"'
354 entry= format_string % (self.user, self.hostname,
355 utils.scp_remote_escape(entry))
356 processed_source.append(entry)
357
358 processed_dest= os.path.abspath(dest)
359 if os.path.isdir(dest):
360 processed_dest= "%s/" % (utils.sh_escape(processed_dest),)
361 else:
362 processed_dest= utils.sh_escape(processed_dest)
363
364 utils.run('scp -rpq %s "%s"' % (
365 " ".join(processed_source),
366 processed_dest))
mbligh7d2bde82007-08-02 16:26:10 +0000367
368
mblighdcd57a82007-07-11 23:06:47 +0000369 def send_file(self, source, dest):
mbligh7d2bde82007-08-02 16:26:10 +0000370 """
371 Copy files from a local path to the remote host.
mblighdcd57a82007-07-11 23:06:47 +0000372
373 Directories will be copied recursively.
374 If a source component is a directory with a trailing slash,
375 the content of the directory will be copied, otherwise, the
376 directory itself and its content will be copied. This
377 behavior is similar to that of the program 'rsync'.
378
379 Args:
380 source: either
381 1) a single file or directory, as a string
382 2) a list of one or more (possibly mixed)
383 files or directories
384 dest: a file or a directory (if source contains a
385 directory or more than one element, you must
386 supply a directory dest)
387
388 Raises:
389 AutoservRunError: the scp command failed
390 """
391 if isinstance(source, types.StringTypes):
392 source= [source]
393
394 processed_source= []
395 for entry in source:
396 if entry.endswith('/'):
397 format_string= '"%s/"*'
398 else:
399 format_string= '"%s"'
400 entry= format_string % (utils.sh_escape(os.path.abspath(entry)),)
401 processed_source.append(entry)
mbligh7d2bde82007-08-02 16:26:10 +0000402
mblighe6647d12007-10-17 00:00:01 +0000403 result = utils.run(r'%s rsync -h' % self.ssh_command(),
404 ignore_status=True)
mblighd5669092007-08-27 19:01:05 +0000405
mbligh0faf91f2007-10-18 03:10:48 +0000406 remote_dest = '%s@%s:"%s"' % (
407 self.user, self.hostname,
408 utils.scp_remote_escape(dest))
mblighd5669092007-08-27 19:01:05 +0000409 if result.exit_status == 0:
mbligh0faf91f2007-10-18 03:10:48 +0000410 utils.run('rsync --rsh="%s" -az %s %s' % (
411 self.SSH_BASE_COMMAND, " ".join(processed_source),
412 remote_dest))
mblighd5669092007-08-27 19:01:05 +0000413 else:
mbligh0faf91f2007-10-18 03:10:48 +0000414 utils.run('scp -rpq %s %s' % (
415 " ".join(processed_source),
416 remote_dest))
mbligh7d2bde82007-08-02 16:26:10 +0000417
mblighdcd57a82007-07-11 23:06:47 +0000418 def get_tmp_dir(self):
mbligh7d2bde82007-08-02 16:26:10 +0000419 """
420 Return the pathname of a directory on the host suitable
mblighdcd57a82007-07-11 23:06:47 +0000421 for temporary file storage.
422
423 The directory and its content will be deleted automatically
424 on the destruction of the Host object that was used to obtain
425 it.
426 """
mbligha25b29e2007-08-26 13:58:04 +0000427 dir_name= self.run("mktemp -d /tmp/autoserv-XXXXXX").stdout.rstrip(" \n")
mblighdcd57a82007-07-11 23:06:47 +0000428 self.tmp_dirs.append(dir_name)
429 return dir_name
mbligh7d2bde82007-08-02 16:26:10 +0000430
431
mblighdcd57a82007-07-11 23:06:47 +0000432 def is_up(self):
mbligh7d2bde82007-08-02 16:26:10 +0000433 """
434 Check if the remote host is up.
mblighdcd57a82007-07-11 23:06:47 +0000435
436 Returns:
437 True if the remote host is up, False otherwise
438 """
439 try:
440 result= self.run("true", timeout=10)
441 except errors.AutoservRunError:
442 return False
443 else:
444 if result.exit_status == 0:
445 return True
446 else:
mbligh7d2bde82007-08-02 16:26:10 +0000447
mblighdcd57a82007-07-11 23:06:47 +0000448 return False
mbligh7d2bde82007-08-02 16:26:10 +0000449
mblighdcd57a82007-07-11 23:06:47 +0000450 def wait_up(self, timeout=None):
mbligh7d2bde82007-08-02 16:26:10 +0000451 """
452 Wait until the remote host is up or the timeout expires.
mblighdcd57a82007-07-11 23:06:47 +0000453
454 In fact, it will wait until an ssh connection to the remote
455 host can be established.
456
457 Args:
458 timeout: time limit in seconds before returning even
459 if the host is not up.
460
461 Returns:
462 True if the host was found to be up, False otherwise
463 """
464 if timeout:
465 end_time= time.time() + timeout
466
467 while not timeout or time.time() < end_time:
468 try:
mblighe9cf9d42007-08-31 08:56:00 +0000469 run_timeout= 10
mblighdcd57a82007-07-11 23:06:47 +0000470 result= self.run("true", timeout=run_timeout)
471 except errors.AutoservRunError:
472 pass
473 else:
474 if result.exit_status == 0:
475 return True
476 time.sleep(1)
477
478 return False
mbligh7d2bde82007-08-02 16:26:10 +0000479
480
mblighdcd57a82007-07-11 23:06:47 +0000481 def wait_down(self, timeout=None):
mbligh7d2bde82007-08-02 16:26:10 +0000482 """
483 Wait until the remote host is down or the timeout expires.
mblighdcd57a82007-07-11 23:06:47 +0000484
485 In fact, it will wait until an ssh connection to the remote
486 host fails.
487
488 Args:
489 timeout: time limit in seconds before returning even
490 if the host is not up.
491
492 Returns:
493 True if the host was found to be down, False otherwise
494 """
495 if timeout:
496 end_time= time.time() + timeout
497
498 while not timeout or time.time() < end_time:
499 try:
mbligh7e1e9642007-07-31 18:00:45 +0000500 run_timeout= 10
mblighdcd57a82007-07-11 23:06:47 +0000501 result= self.run("true", timeout=run_timeout)
502 except errors.AutoservRunError:
503 return True
504 else:
505 if result.aborted:
506 return True
507 time.sleep(1)
508
509 return False
mbligh7d2bde82007-08-02 16:26:10 +0000510
511
mblighdbe4a382007-07-26 19:41:28 +0000512 def ensure_up(self):
mbligh7d2bde82007-08-02 16:26:10 +0000513 """
514 Ensure the host is up if it is not then do not proceed;
515 this prevents cacading failures of tests
516 """
mbligha0452c82007-08-08 20:24:57 +0000517 print 'Ensuring that %s is up before continuing' % self.hostname
518 if hasattr(self, 'hardreset') and not self.wait_up(300):
mblighdbe4a382007-07-26 19:41:28 +0000519 print "Performing a hardreset on %s" % self.hostname
520 self.hardreset()
521 self.wait_up()
mbligha0452c82007-08-08 20:24:57 +0000522 print 'Host up, continuing'
mbligh7d2bde82007-08-02 16:26:10 +0000523
524
mblighdcd57a82007-07-11 23:06:47 +0000525 def get_num_cpu(self):
mbligh7d2bde82007-08-02 16:26:10 +0000526 """
527 Get the number of CPUs in the host according to
mblighdcd57a82007-07-11 23:06:47 +0000528 /proc/cpuinfo.
529
530 Returns:
531 The number of CPUs
532 """
533
mbligh5f876ad2007-10-12 23:59:53 +0000534 proc_cpuinfo = self.run("cat /proc/cpuinfo").stdout
mblighdcd57a82007-07-11 23:06:47 +0000535 cpus = 0
536 for line in proc_cpuinfo.splitlines():
537 if line.startswith('processor'):
538 cpus += 1
539 return cpus
mbligh5f876ad2007-10-12 23:59:53 +0000540
541
542 def check_uptime(self):
543 """
544 Check that uptime is available and monotonically increasing.
545 """
546 if not self.ping():
547 raise "Client is not pingable"
548 result = self.run("/bin/cat /proc/uptime", 30)
549 return result.stdout.strip().split()[0]
550
551
552 def get_arch(self):
553 """
554 Get the hardware architecture of the remote machine
555 """
556 arch = self.run('/bin/uname -m').stdout.rstrip()
557 if re.match(r'i\d86$', arch):
558 arch = 'i386'
559 return arch
560
561
562 def get_kernel_ver(self):
563 """
564 Get the kernel version of the remote machine
565 """
566 return self.run('/bin/uname -r').stdout.rstrip()
567
568
569 def get_cmdline(self):
570 """
571 Get the kernel command line of the remote machine
572 """
573 return self.run('cat /proc/cmdline').stdout.rstrip()
574
575
576 def ping(self):
577 """
578 Ping the remote system, and return whether it's available
579 """
580 fpingcmd = "%s -q %s" % ('/usr/bin/fping', self.hostname)
581 rc = utils.system(fpingcmd, ignore_status = 1)
582 return (rc == 0)