blob: 8dbbb723e5061521923ae9508f7fa3b34c73ed34 [file] [log] [blame]
jadmanski96b78072009-05-21 22:21:04 +00001import os, time, pickle, logging
2
3from autotest_lib.server import utils, profiler
4
5
6# import any site hooks for the crashdump and crashinfo collection
7get_site_crashdumps = utils.import_site_function(
8 __file__, "autotest_lib.server.site_crashcollect", "get_site_crashdumps",
9 lambda host, test_start_time: None)
10get_site_crashinfo = utils.import_site_function(
11 __file__, "autotest_lib.server.site_crashcollect", "get_site_crashinfo",
12 lambda host, test_start_time: None)
13
14
15def get_crashdumps(host, test_start_time):
16 get_site_crashdumps(host, test_start_time)
17
18
19def get_crashinfo(host, test_start_time):
20 logging.info("Collecting crash information...")
21
22 # include crashdumps as part of the general crashinfo
23 get_crashdumps(host, test_start_time)
24
jadmanski663d55a2009-05-21 22:54:28 +000025 if wait_for_machine_to_recover(host):
26 # run any site-specific collection
27 get_site_crashinfo(host, test_start_time)
28
29 crashinfo_dir = get_crashinfo_dir(host)
30 collect_log_file(host, "/var/log/messages", crashinfo_dir)
31 collect_log_file(host, "/var/log/monitor-ssh-reboots", crashinfo_dir)
32 collect_command(host, "dmesg", os.path.join(crashinfo_dir, "dmesg"))
33 collect_profiler_data(host, crashinfo_dir)
34 collect_uncollected_logs(host)
35
36
37def wait_for_machine_to_recover(host, hours_to_wait=4.0):
38 """Wait for a machine (possibly down) to become accessible again.
39
40 @param host: A RemoteHost instance to wait on
41 @param hours_to_wait: Number of hours to wait before giving up
42
43 @returns: True if the machine comes back up, False otherwise
44 """
jadmanski96b78072009-05-21 22:21:04 +000045 current_time = time.strftime("%b %d %H:%M:%S", time.localtime())
46 logging.info("Waiting four hours for %s to come up (%s)",
47 host.hostname, current_time)
jadmanski663d55a2009-05-21 22:54:28 +000048 if not host.wait_up(timeout=hours_to_wait * 3600):
jadmanski96b78072009-05-21 22:21:04 +000049 logging.warning("%s down, unable to collect crash info",
50 host.hostname)
jadmanski663d55a2009-05-21 22:54:28 +000051 return False
jadmanski96b78072009-05-21 22:21:04 +000052 else:
53 logging.info("%s is back up, collecting crash info", host.hostname)
jadmanski663d55a2009-05-21 22:54:28 +000054 return True
jadmanski96b78072009-05-21 22:21:04 +000055
jadmanski96b78072009-05-21 22:21:04 +000056
jadmanski663d55a2009-05-21 22:54:28 +000057def get_crashinfo_dir(host):
58 """Find and if necessary create a directory to store crashinfo in.
59
60 @param host: The RemoteHost object that crashinfo will be collected from
61
62 @returns: The path to an existing directory for writing crashinfo into
63 """
jadmanski96b78072009-05-21 22:21:04 +000064 host_resultdir = getattr(getattr(host, "job", None), "resultdir", None)
65 if host_resultdir:
66 infodir = host_resultdir
67 else:
68 infodir = os.path.abspath(os.getcwd())
69 infodir = os.path.join(infodir, "crashinfo.%s" % host.hostname)
70 if not os.path.exists(infodir):
71 os.mkdir(infodir)
jadmanski663d55a2009-05-21 22:54:28 +000072 return infodir
jadmanski96b78072009-05-21 22:21:04 +000073
jadmanski96b78072009-05-21 22:21:04 +000074
jadmanski663d55a2009-05-21 22:54:28 +000075def collect_log_file(host, log_path, dest_path):
76 """Collects a log file from the remote machine.
77
78 Log files are collected from the remote machine and written into the
79 destination path. If dest_path is a directory, the log file will be named
80 using the basename of the remote log path.
81
82 @param host: The RemoteHost to collect logs from
83 @param log_path: The remote path to collect the log file from
84 @param dest_path: A path (file or directory) to write the copies logs into
85 """
86 logging.info("Collecting %s...", log_path)
87 try:
88 host.get_file(log_path, dest_path, preserve_perm=False)
89 except Exception:
90 logging.warning("Collection of %s failed", log_path)
91
92
93
94def collect_command(host, command, dest_path):
95 """Collects the result of a command on the remote machine.
96
97 The standard output of the command will be collected and written into the
98 desitionation path. The destination path is assumed to be filename and
99 not a directory.
100
101 @param host: The RemoteHost to collect from
102 @param command: A shell command to run on the remote machine and capture
103 the output from.
104 @param dest_path: A file path to write the results of the log into
105 """
106 logging.info("Collecting '%s' ...", command)
jadmanski96b78072009-05-21 22:21:04 +0000107 devnull = open("/dev/null", "w")
108 try:
109 try:
jadmanski663d55a2009-05-21 22:54:28 +0000110 result = host.run(command, stdout_tee=devnull).stdout
111 utils.open_write_close(dest_path, result)
jadmanski96b78072009-05-21 22:21:04 +0000112 except Exception, e:
jadmanski663d55a2009-05-21 22:54:28 +0000113 logging.warning("Collection of '%s' failed:\n%s", command, e)
jadmanski96b78072009-05-21 22:21:04 +0000114 finally:
115 devnull.close()
116
jadmanski663d55a2009-05-21 22:54:28 +0000117
118def collect_profiler_data(host, dest_path):
119 """Collects any leftover profiler data that can be found.
120
121 Any profiler data found will be written into a subdirectory of the
122 crashinfo path called "profiler.$REMOTEDIR" where $REMOTEDIR is the
123 basename of the remote profiler data path.
124
125 @param host: The RemoteHost to collect from
126 @param dest_path: A directory to copy the profiler results into
127 """
jadmanski96b78072009-05-21 22:21:04 +0000128 logging.info("Collecting any server-side profiler data lying around...")
129 try:
130 cmd = "ls %s" % profiler.PROFILER_TMPDIR
131 profiler_dirs = [path for path in host.run(cmd).stdout.split()
132 if path.startswith("autoserv-")]
133 for profiler_dir in profiler_dirs:
134 remote_path = profiler.get_profiler_results_dir(profiler_dir)
135 remote_exists = host.run("ls %s" % remote_path,
136 ignore_status=True).exit_status == 0
137 if not remote_exists:
138 continue
jadmanski663d55a2009-05-21 22:54:28 +0000139 local_path = os.path.join(dest_path, "profiler." + profiler_dir)
jadmanski96b78072009-05-21 22:21:04 +0000140 os.mkdir(local_path)
141 host.get_file(remote_path + "/", local_path)
142 except Exception, e:
143 logging.warning("Collection of profiler data failed with:\n%s", e)
144
145
jadmanski663d55a2009-05-21 22:54:28 +0000146def collect_uncollected_logs(host):
147 """Collects any leftover uncollected logs from the client.
148
149 @param host: The RemoteHost to collect from
150 """
jadmanski96b78072009-05-21 22:21:04 +0000151 if not host.job.uncollected_log_file:
152 host.job.uncollected_log_file = ''
jadmanski663d55a2009-05-21 22:54:28 +0000153
jadmanski96b78072009-05-21 22:21:04 +0000154 if host.job and os.path.exists(host.job.uncollected_log_file):
155 try:
156 logs = pickle.load(open(host.job.uncollected_log_file))
157 for hostname, remote_path, local_path in logs:
158 if hostname == host.hostname:
159 logging.info("Retrieving logs from %s:%s into %s",
jadmanski663d55a2009-05-21 22:54:28 +0000160 hostname, remote_path, local_path)
jadmanski96b78072009-05-21 22:21:04 +0000161 host.get_file(remote_path + "/", local_path + "/")
162 except Exception, e:
163 logging.warning("Error while trying to collect stranded "
jadmanski663d55a2009-05-21 22:54:28 +0000164 "Autotest client logs: %s", e)