Add support for running autoserv with a "--collect-crashinfo" flag that tells autotest to run a job that includes ONLY the crashinfo collection. This will pull back crashinfo, and if run against a results directory from a crashed job it will also pull back any client results that it can find on the remote host. Risk: Low Visibility: Adds a new mode to autoserv for just doing crashinfo collection. Signed-off-by: John Admanski <jadmanski@google.com> git-svn-id: http://test.kernel.org/svn/autotest/trunk@2933 592f7852-d20e-0410-864c-8624ca9c26a4

commit: def0c3cccb3b77b2c7e3a90f427cf4a70292dfd9 [log] [tgz]
author: jadmanski <jadmanski@592f7852-d20e-0410-864c-8624ca9c26a4> Wed Mar 25 20:07:10 2009 +0000
committer: jadmanski <jadmanski@592f7852-d20e-0410-864c-8624ca9c26a4> Wed Mar 25 20:07:10 2009 +0000
tree: 5f232ae06f4e94cf73a58ee108ac958a4d9f4d03
parent: f40c02cf27e3765326e937a0a59a38e13365fb97 [diff] [blame]
diff --git a/server/hosts/remote.py b/server/hosts/remote.py
index b0fb8bc..2c4a79f 100644
--- a/server/hosts/remote.py
+++ b/server/hosts/remote.py

@@ -1,7 +1,7 @@
 """This class defines the Remote host class, mixing in the SiteHost class
 if it is available."""
 
-import os, time
+import os, time, pickle, logging
 from autotest_lib.client.common_lib import error
 from autotest_lib.server import utils, profiler
 from autotest_lib.server.hosts import base_classes, bootloader
@@ -201,18 +201,19 @@
 
 
     def get_crashinfo(self, test_start_time):
-        print "Collecting crash information..."
+        logging.info("Collecting crash information...")
         super(RemoteHost, self).get_crashinfo(test_start_time)
 
         # wait for four hours, to see if the machine comes back up
         current_time = time.strftime("%b %d %H:%M:%S", time.localtime())
-        print "Waiting four hours for %s to come up (%s)" % (self.hostname,
-                                                             current_time)
+        logging.info("Waiting four hours for %s to come up (%s)",
+                    self.hostname, current_time)
         if not self.wait_up(timeout=4*60*60):
-            print "%s down, unable to collect crash info" % self.hostname
+            logging.warning("%s down, unable to collect crash info",
+                           self.hostname)
             return
         else:
-            print "%s is back up, collecting crash info" % self.hostname
+            logging.info("%s is back up, collecting crash info", self.hostname)
 
         # find a directory to put the crashinfo into
         if self.job:
@@ -226,26 +227,26 @@
         # collect various log files
         log_files = ["/var/log/messages", "/var/log/monitor-ssh-reboots"]
         for log in log_files:
-            print "Collecting %s..." % log
+            logging.info("Collecting %s...", log)
             try:
                 self.get_file(log, infodir)
             except Exception:
-                print "Collection of %s failed. Non-fatal, continuing." % log
+                logging.warning("Collection of %s failed", log)
 
         # collect dmesg
-        print "Collecting dmesg (saved to crashinfo/dmesg)..."
+        logging.info("Collecting dmesg (saved to crashinfo/dmesg)...")
         devnull = open("/dev/null", "w")
         try:
             try:
                 result = self.run("dmesg", stdout_tee=devnull).stdout
                 file(os.path.join(infodir, "dmesg"), "w").write(result)
             except Exception, e:
-                print "crashinfo collection of dmesg failed with:\n%s" % e
+                logging.warning("Collection of dmesg failed:\n%s", e)
         finally:
             devnull.close()
 
         # collect any profiler data we can find
-        print "Collecting any server-side profiler data lying around..."
+        logging.info("Collecting any server-side profiler data lying around...")
         try:
             cmd = "ls %s" % profiler.PROFILER_TMPDIR
             profiler_dirs = [path for path in self.run(cmd).stdout.split()
@@ -260,7 +261,21 @@
                 os.mkdir(local_path)
                 self.get_file(remote_path + "/", local_path)
         except Exception, e:
-            print "crashinfo collection of profiler data failed with:\n%s" % e
+            logging.warning("Collection of profiler data failed with:\n%s", e)
+
+
+        # collect any uncollected logs we see (for this host)
+        if self.job and os.path.exists(self.job.uncollected_log_file):
+            try:
+                logs = pickle.load(open(self.job.uncollected_log_file))
+                for hostname, remote_path, local_path in logs:
+                    if hostname == self.hostname:
+                        logging.info("Retrieving logs from %s:%s into %s",
+                                    hostname, remote_path, local_path)
+                        self.get_file(remote_path + "/", local_path + "/")
+            except Exception, e:
+                logging.warning("Error while trying to collect stranded "
+                               "Autotest client logs: %s", e)
 
 
     def are_wait_up_processes_up(self):
commit	def0c3cccb3b77b2c7e3a90f427cf4a70292dfd9	[log] [tgz]
author	jadmanski <jadmanski@592f7852-d20e-0410-864c-8624ca9c26a4>	Wed Mar 25 20:07:10 2009 +0000
committer	jadmanski <jadmanski@592f7852-d20e-0410-864c-8624ca9c26a4>	Wed Mar 25 20:07:10 2009 +0000
tree	5f232ae06f4e94cf73a58ee108ac958a4d9f4d03
parent	f40c02cf27e3765326e937a0a59a38e13365fb97 [diff] [blame]