Adds support to wait_down and wait_for_restart to watch for changed
boot_id values (using /proc/sys/kernel/random/boot_id). This avoids a
race condition where if a machine was able to successfully shutdown
and boot up again after you started a reboot and before we started
polling for the shutdown, it would just appear that the machine
failed to shutdown when told.
As a result the Host.wait_down method now treats the case where a
machine is NOT down but has a new boot_id as being "down", because
this implies that the machine did shut down (and subsequently came
back up). This does mean that you cannot assume that a successful
wait_down implies that the machine is now down, but that was never
true anyway.
Risk: High
Visibility: We can now reliably detect the restart of machines with
very fast restart times.
Signed-off-by: John Admanski <jadmanski@google.com>
git-svn-id: http://test.kernel.org/svn/autotest/trunk@4096 592f7852-d20e-0410-864c-8624ca9c26a4
diff --git a/server/hosts/serial.py b/server/hosts/serial.py
index c9580b6..9b4bdb2 100644
--- a/server/hosts/serial.py
+++ b/server/hosts/serial.py
@@ -128,6 +128,15 @@
wait_for_restart()
"""
conmux_command = "'~$%s'" % conmux_command
+
+ # if the machine is up, grab the old boot id, otherwise use a dummy
+ # string and NOT None to ensure that wait_down always returns True,
+ # even if the machine comes back up before it's called
+ try:
+ old_boot_id = self.get_boot_id()
+ except error.AutoservSSHTimeout:
+ old_boot_id = 'unknown boot_id prior to SerialHost.hardreset'
+
def reboot():
if not self.run_conmux(conmux_command):
self.record("ABORT", None, "reboot.start",
@@ -141,9 +150,12 @@
for attempt in xrange(num_attempts-1):
try:
self.wait_for_restart(timeout, log_failure=False,
+ old_boot_id=old_boot_id,
**wait_for_restart_kwargs)
except error.AutoservShutdownError:
logging.warning(warning_msg, attempt+1, num_attempts)
+ # re-send the hard reset command
+ self.run_conmux(conmux_command)
else:
break
else: