blob: edf0c45b6c87f5dcaca37f5a83ae899b49cc76d2 [file] [log] [blame]
mblighdcd57a82007-07-11 23:06:47 +00001#!/usr/bin/python
2#
3# Copyright 2007 Google Inc. Released under the GPL v2
4
mbligh7d2bde82007-08-02 16:26:10 +00005"""
6This module defines the SSHHost class.
mblighdcd57a82007-07-11 23:06:47 +00007
8Implementation details:
9You should import the "hosts" package instead of importing each type of host.
10
11 SSHHost: a remote machine with a ssh access
12"""
13
mbligh7d2bde82007-08-02 16:26:10 +000014__author__ = """
15mbligh@google.com (Martin J. Bligh),
mblighdcd57a82007-07-11 23:06:47 +000016poirier@google.com (Benjamin Poirier),
mbligh7d2bde82007-08-02 16:26:10 +000017stutsman@google.com (Ryan Stutsman)
18"""
mblighdcd57a82007-07-11 23:06:47 +000019
20
mblighde384372007-10-17 04:25:37 +000021import types, os, sys, signal, subprocess, time, re, socket
mbligh5f876ad2007-10-12 23:59:53 +000022import base_classes, utils, errors, bootloader
mblighdcd57a82007-07-11 23:06:47 +000023
24
25class SSHHost(base_classes.RemoteHost):
mbligh7d2bde82007-08-02 16:26:10 +000026 """
27 This class represents a remote machine controlled through an ssh
mblighdcd57a82007-07-11 23:06:47 +000028 session on which you can run programs.
mbligh7d2bde82007-08-02 16:26:10 +000029
mblighdcd57a82007-07-11 23:06:47 +000030 It is not the machine autoserv is running on. The machine must be
31 configured for password-less login, for example through public key
32 authentication.
mbligh7d2bde82007-08-02 16:26:10 +000033
mbligh3409ee72007-10-16 23:58:33 +000034 It includes support for controlling the machine through a serial
35 console on which you can run programs. If such a serial console is
36 set up on the machine then capabilities such as hard reset and
37 boot strap monitoring are available. If the machine does not have a
38 serial console available then ordinary SSH-based commands will
39 still be available, but attempts to use extensions such as
40 console logging or hard reset will fail silently.
41
mblighdcd57a82007-07-11 23:06:47 +000042 Implementation details:
43 This is a leaf class in an abstract class hierarchy, it must
44 implement the unimplemented methods in parent classes.
45 """
mbligh7d2bde82007-08-02 16:26:10 +000046
mblighde384372007-10-17 04:25:37 +000047 def __init__(self, hostname, user="root", port=22, initialize=True,
48 conmux_log=None, conmux_server=None, conmux_attach=None,
49 netconsole_log=None, netconsole_port=6666):
mbligh7d2bde82007-08-02 16:26:10 +000050 """
51 Construct a SSHHost object
mblighdcd57a82007-07-11 23:06:47 +000052
53 Args:
54 hostname: network hostname or address of remote machine
55 user: user to log in as on the remote machine
56 port: port the ssh daemon is listening on on the remote
57 machine
58 """
mblighdcd57a82007-07-11 23:06:47 +000059 self.hostname= hostname
60 self.user= user
61 self.port= port
62 self.tmp_dirs= []
mbligh137a05c2007-10-04 15:56:51 +000063 self.initialize = initialize
mbligh91334902007-09-28 01:47:59 +000064
mbligh3409ee72007-10-16 23:58:33 +000065 self.conmux_server = conmux_server
66 self.conmux_attach = self.__find_console_attach(conmux_attach)
67 self.logger_pid = None
mblighde384372007-10-17 04:25:37 +000068 self.__start_console_log(conmux_log)
mbligh3409ee72007-10-16 23:58:33 +000069
mbligh91334902007-09-28 01:47:59 +000070 super(SSHHost, self).__init__()
mbligha0452c82007-08-08 20:24:57 +000071 self.bootloader = bootloader.Bootloader(self)
mbligh7d2bde82007-08-02 16:26:10 +000072
mblighde384372007-10-17 04:25:37 +000073 self.__init_netconsole_params(netconsole_port)
74 self.netlogger_pid = None
75 self.__start_netconsole_log(netconsole_log, netconsole_port)
76 self.__load_netconsole_module()
77
mbligh7d2bde82007-08-02 16:26:10 +000078
mblighdcd57a82007-07-11 23:06:47 +000079 def __del__(self):
mbligh7d2bde82007-08-02 16:26:10 +000080 """
81 Destroy a SSHHost object
mblighdcd57a82007-07-11 23:06:47 +000082 """
83 for dir in self.tmp_dirs:
84 try:
85 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
86 except errors.AutoservRunError:
87 pass
mblighde384372007-10-17 04:25:37 +000088 # kill the console logger
mbligh3409ee72007-10-16 23:58:33 +000089 if self.logger_pid:
90 try:
91 pgid = os.getpgid(self.logger_pid)
92 os.killpg(pgid, signal.SIGTERM)
93 except OSError:
94 pass
mblighde384372007-10-17 04:25:37 +000095 # kill the netconsole logger
96 if self.netlogger_pid:
97 try:
98 os.kill(self.netlogger_pid, signal.SIGTERM)
99 except OSError:
100 pass
101
102
103 def __init_netconsole_params(self, port):
104 """
105 Connect to the remote machine and determine the values to use for the
106 required netconsole parameters.
107 """
108 self.__netconsole_param = ""
109 # PROBLEM: on machines with multiple IPs this may not make any sense
110 # It also doesn't work with IPv6
111 remote_ip = socket.gethostbyname(self.hostname)
112 local_ip = socket.gethostbyname(socket.gethostname())
113 # Get the gateway of the remote machine
114 try:
115 traceroute = self.run('traceroute -n %s' % local_ip)
116 except errors.AutoservRunError:
117 return
118 first_node = traceroute.stdout.split("\n")[0]
119 match = re.search(r'\s+((\d+\.){3}\d+)\s+', first_node)
120 if match:
121 router_ip = match.group(1)
122 else:
123 return
124 # Look up the MAC address of the gateway
125 try:
126 self.run('ping -c 1 %s' % router_ip)
127 arp = self.run('arp -n -a %s' % router_ip)
128 except errors.AutoservRunError:
129 return
130 match = re.search(r'\s+(([0-9A-F]{2}:){5}[0-9A-F]{2})\s+', arp.stdout)
131 if match:
132 gateway_mac = match.group(1)
133 else:
134 return
135 self.__netconsole_param = 'netconsole=@%s/,%s@%s/%s' % (remote_ip,
136 port,
137 local_ip,
138 gateway_mac)
139
140
141 def __start_netconsole_log(self, logfilename, port):
142 """
143 Log the output of netconsole to a specified file
144 """
145 if logfilename == None:
146 return
147 cmd = ['nc', '-u', '-l', '-p', str(port)]
148 logger = subprocess.Popen(cmd, stdout=open(logfilename, "w"))
149 self.netlogger_pid = logger.pid
150
151
152 def __load_netconsole_module(self):
153 """
154 Make a best effort to load the netconsole module.
155
156 Note that loading the module can fail even when the remote machine is
157 working correctly if netconsole is already compiled into the kernel
158 and started.
159 """
160 try:
161 self.run('modprobe netconsole %s' % self.__netconsole_param)
162 except errors.AutoservRunError:
163 # if it fails there isn't much we can do, just keep going
164 pass
165
166
167 def __unload_netconsole_module(self):
168 try:
169 self.run('modprobe -r netconsole')
170 except errors.AutoservRunError:
171 pass
mbligh3409ee72007-10-16 23:58:33 +0000172
173
174 def __wait_for_restart(self, timeout):
175 self.wait_down(60) # Make sure he's dead, Jim
176 self.wait_up(timeout)
177 time.sleep(2) # this is needed for complete reliability
178 self.wait_up(timeout)
179 print "Reboot complete"
180
181
182 def hardreset(self, timeout=600, wait=True):
183 """
184 Reach out and slap the box in the power switch
185 """
186 result = self.__console_run(r"'~$hardreset'")
187 if wait:
188 self.__wait_for_restart(timeout)
189 return result
190
191
192 def __start_console_log(self, logfilename):
193 """
194 Log the output of the console session to a specified file
195 """
196 if logfilename == None:
197 return
198 if not self.conmux_attach or not os.path.exists(self.conmux_attach):
199 return
200 if self.conmux_server:
201 to = '%s/%s' % (self.conmux_server, self.hostname)
202 else:
203 to = self.hostname
204 cmd = [self.conmux_attach, to, 'cat - > %s' % logfilename]
205 logger = subprocess.Popen(cmd,
206 stderr=open('/dev/null', 'w'),
207 preexec_fn=lambda: os.setpgid(0, 0))
208 self.logger_pid = logger.pid
209
210
211 def __find_console_attach(self, conmux_attach):
212 if conmux_attach:
213 return conmux_attach
214 try:
215 res = utils.run('which conmux-attach')
216 if res.exit_status == 0:
217 return res.stdout.strip()
218 except errors.AutoservRunError, e:
219 pass
220 autoserv_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
221 autotest_conmux = os.path.join(autoserv_dir, '..',
222 'conmux', 'conmux-attach')
223 autotest_conmux_alt = os.path.join(autoserv_dir,
224 '..', 'autotest',
225 'conmux', 'conmux-attach')
226 locations = [autotest_conmux,
227 autotest_conmux_alt,
228 '/usr/local/conmux/bin/conmux-attach',
229 '/usr/bin/conmux-attach']
230 for l in locations:
231 if os.path.exists(l):
232 return l
233
234 print "WARNING: conmux-attach not found on autoserv server"
235 return None
236
237
238 def __console_run(self, cmd):
239 """
240 Send a command to the conmux session
241 """
242 if not self.conmux_attach or not os.path.exists(self.conmux_attach):
243 return False
244 if self.conmux_server:
245 to = '%s/%s' % (self.conmux_server, self.hostname)
246 else:
247 to = self.hostname
248 cmd = '%s %s echo %s 2> /dev/null' % (self.conmux_attach,
249 to,
250 cmd)
251 result = os.system(cmd)
252 return result == 0
mbligh7d2bde82007-08-02 16:26:10 +0000253
254
mblighe6647d12007-10-17 00:00:01 +0000255 def ssh_command(self):
256 """Construct an ssh command with proper args for this host."""
257 return r'ssh -a -l %s -p %d %s' % (self.user,
258 self.port,
259 self.hostname)
260
261
mblighcf965b02007-07-25 16:49:45 +0000262 def run(self, command, timeout=None, ignore_status=False):
mbligh7d2bde82007-08-02 16:26:10 +0000263 """
264 Run a command on the remote host.
mblighdcd57a82007-07-11 23:06:47 +0000265
266 Args:
267 command: the command line string
268 timeout: time limit in seconds before attempting to
269 kill the running process. The run() function
270 will take a few seconds longer than 'timeout'
271 to complete if it has to kill the process.
mbligh8b85dfb2007-08-28 09:50:31 +0000272 ignore_status: do not raise an exception, no matter
273 what the exit code of the command is.
mblighdcd57a82007-07-11 23:06:47 +0000274
275 Returns:
276 a hosts.base_classes.CmdResult object
277
278 Raises:
279 AutoservRunError: the exit code of the command
280 execution was not 0
281 """
282 #~ print "running %s" % (command,)
mblighe6647d12007-10-17 00:00:01 +0000283 result= utils.run(r'%s "%s"' % (self.ssh_command(),
284 utils.sh_escape(command)),
285 timeout, ignore_status)
mblighdcd57a82007-07-11 23:06:47 +0000286 return result
mbligh7d2bde82007-08-02 16:26:10 +0000287
288
mbligha0452c82007-08-08 20:24:57 +0000289 def reboot(self, timeout=600, label=None, kernel_args=None, wait=True):
mbligh7d2bde82007-08-02 16:26:10 +0000290 """
291 Reboot the remote host.
mbligh8b85dfb2007-08-28 09:50:31 +0000292
mbligha0452c82007-08-08 20:24:57 +0000293 Args:
294 timeout
mbligh8b85dfb2007-08-28 09:50:31 +0000295 """
mblighde384372007-10-17 04:25:37 +0000296 # forcibly include the "netconsole" kernel arg
297 if self.__netconsole_param:
298 if kernel_args is None:
299 kernel_args = self.__netconsole_param
300 else:
301 kernel_args += " " + self.__netconsole_param
302 # unload the (possibly loaded) module to avoid shutdown issues
303 self.__unload_netconsole_module()
mbligha0452c82007-08-08 20:24:57 +0000304 if label or kernel_args:
305 self.bootloader.install_boottool()
306 if label:
307 self.bootloader.set_default(label)
308 if kernel_args:
309 if not label:
310 default = int(self.bootloader.get_default())
311 label = self.bootloader.get_titles()[default]
312 self.bootloader.add_args(label, kernel_args)
mblighd742a222007-09-30 01:27:06 +0000313 print "Reboot: initiating reboot"
mbligha0452c82007-08-08 20:24:57 +0000314 self.run('reboot')
315 if wait:
mbligh3409ee72007-10-16 23:58:33 +0000316 self.__wait_for_restart(timeout)
mblighde384372007-10-17 04:25:37 +0000317 self.__load_netconsole_module() # if the builtin fails
mbligha0452c82007-08-08 20:24:57 +0000318
mbligh7d2bde82007-08-02 16:26:10 +0000319
mblighdcd57a82007-07-11 23:06:47 +0000320 def get_file(self, source, dest):
mbligh7d2bde82007-08-02 16:26:10 +0000321 """
322 Copy files from the remote host to a local path.
mblighdcd57a82007-07-11 23:06:47 +0000323
324 Directories will be copied recursively.
325 If a source component is a directory with a trailing slash,
326 the content of the directory will be copied, otherwise, the
327 directory itself and its content will be copied. This
328 behavior is similar to that of the program 'rsync'.
329
330 Args:
331 source: either
332 1) a single file or directory, as a string
333 2) a list of one or more (possibly mixed)
334 files or directories
335 dest: a file or a directory (if source contains a
336 directory or more than one element, you must
337 supply a directory dest)
338
339 Raises:
340 AutoservRunError: the scp command failed
341 """
342 if isinstance(source, types.StringTypes):
343 source= [source]
344
345 processed_source= []
346 for entry in source:
347 if entry.endswith('/'):
348 format_string= '%s@%s:"%s*"'
349 else:
350 format_string= '%s@%s:"%s"'
351 entry= format_string % (self.user, self.hostname,
352 utils.scp_remote_escape(entry))
353 processed_source.append(entry)
354
355 processed_dest= os.path.abspath(dest)
356 if os.path.isdir(dest):
357 processed_dest= "%s/" % (utils.sh_escape(processed_dest),)
358 else:
359 processed_dest= utils.sh_escape(processed_dest)
360
361 utils.run('scp -rpq %s "%s"' % (
362 " ".join(processed_source),
363 processed_dest))
mbligh7d2bde82007-08-02 16:26:10 +0000364
365
mblighdcd57a82007-07-11 23:06:47 +0000366 def send_file(self, source, dest):
mbligh7d2bde82007-08-02 16:26:10 +0000367 """
368 Copy files from a local path to the remote host.
mblighdcd57a82007-07-11 23:06:47 +0000369
370 Directories will be copied recursively.
371 If a source component is a directory with a trailing slash,
372 the content of the directory will be copied, otherwise, the
373 directory itself and its content will be copied. This
374 behavior is similar to that of the program 'rsync'.
375
376 Args:
377 source: either
378 1) a single file or directory, as a string
379 2) a list of one or more (possibly mixed)
380 files or directories
381 dest: a file or a directory (if source contains a
382 directory or more than one element, you must
383 supply a directory dest)
384
385 Raises:
386 AutoservRunError: the scp command failed
387 """
388 if isinstance(source, types.StringTypes):
389 source= [source]
390
391 processed_source= []
392 for entry in source:
393 if entry.endswith('/'):
394 format_string= '"%s/"*'
395 else:
396 format_string= '"%s"'
397 entry= format_string % (utils.sh_escape(os.path.abspath(entry)),)
398 processed_source.append(entry)
mbligh7d2bde82007-08-02 16:26:10 +0000399
mblighe6647d12007-10-17 00:00:01 +0000400 result = utils.run(r'%s rsync -h' % self.ssh_command(),
401 ignore_status=True)
mblighd5669092007-08-27 19:01:05 +0000402
403 if result.exit_status == 0:
mblighd6dc1fc2007-09-11 21:21:02 +0000404 utils.run('rsync --rsh=ssh -az %s %s@%s:"%s"' % (
mblighd5669092007-08-27 19:01:05 +0000405 " ".join(processed_source), self.user,
406 self.hostname, utils.scp_remote_escape(dest)))
407 else:
408 utils.run('scp -rpq %s %s@%s:"%s"' % (
409 " ".join(processed_source), self.user,
410 self.hostname, utils.scp_remote_escape(dest)))
mbligh7d2bde82007-08-02 16:26:10 +0000411
mblighdcd57a82007-07-11 23:06:47 +0000412 def get_tmp_dir(self):
mbligh7d2bde82007-08-02 16:26:10 +0000413 """
414 Return the pathname of a directory on the host suitable
mblighdcd57a82007-07-11 23:06:47 +0000415 for temporary file storage.
416
417 The directory and its content will be deleted automatically
418 on the destruction of the Host object that was used to obtain
419 it.
420 """
mbligha25b29e2007-08-26 13:58:04 +0000421 dir_name= self.run("mktemp -d /tmp/autoserv-XXXXXX").stdout.rstrip(" \n")
mblighdcd57a82007-07-11 23:06:47 +0000422 self.tmp_dirs.append(dir_name)
423 return dir_name
mbligh7d2bde82007-08-02 16:26:10 +0000424
425
mblighdcd57a82007-07-11 23:06:47 +0000426 def is_up(self):
mbligh7d2bde82007-08-02 16:26:10 +0000427 """
428 Check if the remote host is up.
mblighdcd57a82007-07-11 23:06:47 +0000429
430 Returns:
431 True if the remote host is up, False otherwise
432 """
433 try:
434 result= self.run("true", timeout=10)
435 except errors.AutoservRunError:
436 return False
437 else:
438 if result.exit_status == 0:
439 return True
440 else:
mbligh7d2bde82007-08-02 16:26:10 +0000441
mblighdcd57a82007-07-11 23:06:47 +0000442 return False
mbligh7d2bde82007-08-02 16:26:10 +0000443
mblighdcd57a82007-07-11 23:06:47 +0000444 def wait_up(self, timeout=None):
mbligh7d2bde82007-08-02 16:26:10 +0000445 """
446 Wait until the remote host is up or the timeout expires.
mblighdcd57a82007-07-11 23:06:47 +0000447
448 In fact, it will wait until an ssh connection to the remote
449 host can be established.
450
451 Args:
452 timeout: time limit in seconds before returning even
453 if the host is not up.
454
455 Returns:
456 True if the host was found to be up, False otherwise
457 """
458 if timeout:
459 end_time= time.time() + timeout
460
461 while not timeout or time.time() < end_time:
462 try:
mblighe9cf9d42007-08-31 08:56:00 +0000463 run_timeout= 10
mblighdcd57a82007-07-11 23:06:47 +0000464 result= self.run("true", timeout=run_timeout)
465 except errors.AutoservRunError:
466 pass
467 else:
468 if result.exit_status == 0:
469 return True
470 time.sleep(1)
471
472 return False
mbligh7d2bde82007-08-02 16:26:10 +0000473
474
mblighdcd57a82007-07-11 23:06:47 +0000475 def wait_down(self, timeout=None):
mbligh7d2bde82007-08-02 16:26:10 +0000476 """
477 Wait until the remote host is down or the timeout expires.
mblighdcd57a82007-07-11 23:06:47 +0000478
479 In fact, it will wait until an ssh connection to the remote
480 host fails.
481
482 Args:
483 timeout: time limit in seconds before returning even
484 if the host is not up.
485
486 Returns:
487 True if the host was found to be down, False otherwise
488 """
489 if timeout:
490 end_time= time.time() + timeout
491
492 while not timeout or time.time() < end_time:
493 try:
mbligh7e1e9642007-07-31 18:00:45 +0000494 run_timeout= 10
mblighdcd57a82007-07-11 23:06:47 +0000495 result= self.run("true", timeout=run_timeout)
496 except errors.AutoservRunError:
497 return True
498 else:
499 if result.aborted:
500 return True
501 time.sleep(1)
502
503 return False
mbligh7d2bde82007-08-02 16:26:10 +0000504
505
mblighdbe4a382007-07-26 19:41:28 +0000506 def ensure_up(self):
mbligh7d2bde82007-08-02 16:26:10 +0000507 """
508 Ensure the host is up if it is not then do not proceed;
509 this prevents cacading failures of tests
510 """
mbligha0452c82007-08-08 20:24:57 +0000511 print 'Ensuring that %s is up before continuing' % self.hostname
512 if hasattr(self, 'hardreset') and not self.wait_up(300):
mblighdbe4a382007-07-26 19:41:28 +0000513 print "Performing a hardreset on %s" % self.hostname
514 self.hardreset()
515 self.wait_up()
mbligha0452c82007-08-08 20:24:57 +0000516 print 'Host up, continuing'
mbligh7d2bde82007-08-02 16:26:10 +0000517
518
mblighdcd57a82007-07-11 23:06:47 +0000519 def get_num_cpu(self):
mbligh7d2bde82007-08-02 16:26:10 +0000520 """
521 Get the number of CPUs in the host according to
mblighdcd57a82007-07-11 23:06:47 +0000522 /proc/cpuinfo.
523
524 Returns:
525 The number of CPUs
526 """
527
mbligh5f876ad2007-10-12 23:59:53 +0000528 proc_cpuinfo = self.run("cat /proc/cpuinfo").stdout
mblighdcd57a82007-07-11 23:06:47 +0000529 cpus = 0
530 for line in proc_cpuinfo.splitlines():
531 if line.startswith('processor'):
532 cpus += 1
533 return cpus
mbligh5f876ad2007-10-12 23:59:53 +0000534
535
536 def check_uptime(self):
537 """
538 Check that uptime is available and monotonically increasing.
539 """
540 if not self.ping():
541 raise "Client is not pingable"
542 result = self.run("/bin/cat /proc/uptime", 30)
543 return result.stdout.strip().split()[0]
544
545
546 def get_arch(self):
547 """
548 Get the hardware architecture of the remote machine
549 """
550 arch = self.run('/bin/uname -m').stdout.rstrip()
551 if re.match(r'i\d86$', arch):
552 arch = 'i386'
553 return arch
554
555
556 def get_kernel_ver(self):
557 """
558 Get the kernel version of the remote machine
559 """
560 return self.run('/bin/uname -r').stdout.rstrip()
561
562
563 def get_cmdline(self):
564 """
565 Get the kernel command line of the remote machine
566 """
567 return self.run('cat /proc/cmdline').stdout.rstrip()
568
569
570 def ping(self):
571 """
572 Ping the remote system, and return whether it's available
573 """
574 fpingcmd = "%s -q %s" % ('/usr/bin/fping', self.hostname)
575 rc = utils.system(fpingcmd, ignore_status = 1)
576 return (rc == 0)