Add support for running autoserv with a "--collect-crashinfo" flag
that tells autotest to run a job that includes ONLY the crashinfo
collection. This will pull back crashinfo, and if run against a
results directory from a crashed job it will also pull back any
client results that it can find on the remote host.
Risk: Low
Visibility: Adds a new mode to autoserv for just doing crashinfo
collection.
Signed-off-by: John Admanski <jadmanski@google.com>
git-svn-id: http://test.kernel.org/svn/autotest/trunk@2933 592f7852-d20e-0410-864c-8624ca9c26a4
diff --git a/server/hosts/remote.py b/server/hosts/remote.py
index b0fb8bc..2c4a79f 100644
--- a/server/hosts/remote.py
+++ b/server/hosts/remote.py
@@ -1,7 +1,7 @@
"""This class defines the Remote host class, mixing in the SiteHost class
if it is available."""
-import os, time
+import os, time, pickle, logging
from autotest_lib.client.common_lib import error
from autotest_lib.server import utils, profiler
from autotest_lib.server.hosts import base_classes, bootloader
@@ -201,18 +201,19 @@
def get_crashinfo(self, test_start_time):
- print "Collecting crash information..."
+ logging.info("Collecting crash information...")
super(RemoteHost, self).get_crashinfo(test_start_time)
# wait for four hours, to see if the machine comes back up
current_time = time.strftime("%b %d %H:%M:%S", time.localtime())
- print "Waiting four hours for %s to come up (%s)" % (self.hostname,
- current_time)
+ logging.info("Waiting four hours for %s to come up (%s)",
+ self.hostname, current_time)
if not self.wait_up(timeout=4*60*60):
- print "%s down, unable to collect crash info" % self.hostname
+ logging.warning("%s down, unable to collect crash info",
+ self.hostname)
return
else:
- print "%s is back up, collecting crash info" % self.hostname
+ logging.info("%s is back up, collecting crash info", self.hostname)
# find a directory to put the crashinfo into
if self.job:
@@ -226,26 +227,26 @@
# collect various log files
log_files = ["/var/log/messages", "/var/log/monitor-ssh-reboots"]
for log in log_files:
- print "Collecting %s..." % log
+ logging.info("Collecting %s...", log)
try:
self.get_file(log, infodir)
except Exception:
- print "Collection of %s failed. Non-fatal, continuing." % log
+ logging.warning("Collection of %s failed", log)
# collect dmesg
- print "Collecting dmesg (saved to crashinfo/dmesg)..."
+ logging.info("Collecting dmesg (saved to crashinfo/dmesg)...")
devnull = open("/dev/null", "w")
try:
try:
result = self.run("dmesg", stdout_tee=devnull).stdout
file(os.path.join(infodir, "dmesg"), "w").write(result)
except Exception, e:
- print "crashinfo collection of dmesg failed with:\n%s" % e
+ logging.warning("Collection of dmesg failed:\n%s", e)
finally:
devnull.close()
# collect any profiler data we can find
- print "Collecting any server-side profiler data lying around..."
+ logging.info("Collecting any server-side profiler data lying around...")
try:
cmd = "ls %s" % profiler.PROFILER_TMPDIR
profiler_dirs = [path for path in self.run(cmd).stdout.split()
@@ -260,7 +261,21 @@
os.mkdir(local_path)
self.get_file(remote_path + "/", local_path)
except Exception, e:
- print "crashinfo collection of profiler data failed with:\n%s" % e
+ logging.warning("Collection of profiler data failed with:\n%s", e)
+
+
+ # collect any uncollected logs we see (for this host)
+ if self.job and os.path.exists(self.job.uncollected_log_file):
+ try:
+ logs = pickle.load(open(self.job.uncollected_log_file))
+ for hostname, remote_path, local_path in logs:
+ if hostname == self.hostname:
+ logging.info("Retrieving logs from %s:%s into %s",
+ hostname, remote_path, local_path)
+ self.get_file(remote_path + "/", local_path + "/")
+ except Exception, e:
+ logging.warning("Error while trying to collect stranded "
+ "Autotest client logs: %s", e)
def are_wait_up_processes_up(self):