blob: 1f28861dcdf60f57fd181453cb4125790eb1a758 [file] [log] [blame]
jadmanski4900b3b2009-07-02 22:12:08 +00001import os, time, pickle, logging, shutil
jadmanski96b78072009-05-21 22:21:04 +00002
mbligh15971eb2009-12-29 02:55:23 +00003from autotest_lib.server import utils
jadmanski96b78072009-05-21 22:21:04 +00004
5
6# import any site hooks for the crashdump and crashinfo collection
7get_site_crashdumps = utils.import_site_function(
8 __file__, "autotest_lib.server.site_crashcollect", "get_site_crashdumps",
9 lambda host, test_start_time: None)
10get_site_crashinfo = utils.import_site_function(
11 __file__, "autotest_lib.server.site_crashcollect", "get_site_crashinfo",
12 lambda host, test_start_time: None)
13
14
15def get_crashdumps(host, test_start_time):
16 get_site_crashdumps(host, test_start_time)
17
18
19def get_crashinfo(host, test_start_time):
20 logging.info("Collecting crash information...")
21
22 # include crashdumps as part of the general crashinfo
23 get_crashdumps(host, test_start_time)
24
jadmanski663d55a2009-05-21 22:54:28 +000025 if wait_for_machine_to_recover(host):
26 # run any site-specific collection
27 get_site_crashinfo(host, test_start_time)
28
29 crashinfo_dir = get_crashinfo_dir(host)
jadmanski4900b3b2009-07-02 22:12:08 +000030 collect_messages(host)
jadmanski663d55a2009-05-21 22:54:28 +000031 collect_log_file(host, "/var/log/monitor-ssh-reboots", crashinfo_dir)
32 collect_command(host, "dmesg", os.path.join(crashinfo_dir, "dmesg"))
jadmanski663d55a2009-05-21 22:54:28 +000033 collect_uncollected_logs(host)
34
35
36def wait_for_machine_to_recover(host, hours_to_wait=4.0):
37 """Wait for a machine (possibly down) to become accessible again.
38
39 @param host: A RemoteHost instance to wait on
40 @param hours_to_wait: Number of hours to wait before giving up
41
42 @returns: True if the machine comes back up, False otherwise
43 """
jadmanski96b78072009-05-21 22:21:04 +000044 current_time = time.strftime("%b %d %H:%M:%S", time.localtime())
mbligh67429f42009-11-06 03:00:37 +000045 if host.is_up():
46 logging.info("%s already up, collecting crash info", host.hostname)
47 return True
48
jadmanski96b78072009-05-21 22:21:04 +000049 logging.info("Waiting four hours for %s to come up (%s)",
50 host.hostname, current_time)
jadmanski663d55a2009-05-21 22:54:28 +000051 if not host.wait_up(timeout=hours_to_wait * 3600):
jadmanski96b78072009-05-21 22:21:04 +000052 logging.warning("%s down, unable to collect crash info",
53 host.hostname)
jadmanski663d55a2009-05-21 22:54:28 +000054 return False
jadmanski96b78072009-05-21 22:21:04 +000055 else:
56 logging.info("%s is back up, collecting crash info", host.hostname)
jadmanski663d55a2009-05-21 22:54:28 +000057 return True
jadmanski96b78072009-05-21 22:21:04 +000058
jadmanski96b78072009-05-21 22:21:04 +000059
jadmanski663d55a2009-05-21 22:54:28 +000060def get_crashinfo_dir(host):
61 """Find and if necessary create a directory to store crashinfo in.
62
63 @param host: The RemoteHost object that crashinfo will be collected from
64
65 @returns: The path to an existing directory for writing crashinfo into
66 """
jadmanski96b78072009-05-21 22:21:04 +000067 host_resultdir = getattr(getattr(host, "job", None), "resultdir", None)
68 if host_resultdir:
69 infodir = host_resultdir
70 else:
71 infodir = os.path.abspath(os.getcwd())
72 infodir = os.path.join(infodir, "crashinfo.%s" % host.hostname)
73 if not os.path.exists(infodir):
74 os.mkdir(infodir)
jadmanski663d55a2009-05-21 22:54:28 +000075 return infodir
jadmanski96b78072009-05-21 22:21:04 +000076
jadmanski96b78072009-05-21 22:21:04 +000077
jadmanski663d55a2009-05-21 22:54:28 +000078def collect_log_file(host, log_path, dest_path):
79 """Collects a log file from the remote machine.
80
81 Log files are collected from the remote machine and written into the
82 destination path. If dest_path is a directory, the log file will be named
83 using the basename of the remote log path.
84
85 @param host: The RemoteHost to collect logs from
86 @param log_path: The remote path to collect the log file from
87 @param dest_path: A path (file or directory) to write the copies logs into
88 """
89 logging.info("Collecting %s...", log_path)
90 try:
91 host.get_file(log_path, dest_path, preserve_perm=False)
92 except Exception:
93 logging.warning("Collection of %s failed", log_path)
94
95
96
97def collect_command(host, command, dest_path):
98 """Collects the result of a command on the remote machine.
99
100 The standard output of the command will be collected and written into the
101 desitionation path. The destination path is assumed to be filename and
102 not a directory.
103
104 @param host: The RemoteHost to collect from
105 @param command: A shell command to run on the remote machine and capture
106 the output from.
107 @param dest_path: A file path to write the results of the log into
108 """
109 logging.info("Collecting '%s' ...", command)
jadmanski96b78072009-05-21 22:21:04 +0000110 devnull = open("/dev/null", "w")
111 try:
112 try:
jadmanski663d55a2009-05-21 22:54:28 +0000113 result = host.run(command, stdout_tee=devnull).stdout
114 utils.open_write_close(dest_path, result)
jadmanski96b78072009-05-21 22:21:04 +0000115 except Exception, e:
jadmanski663d55a2009-05-21 22:54:28 +0000116 logging.warning("Collection of '%s' failed:\n%s", command, e)
jadmanski96b78072009-05-21 22:21:04 +0000117 finally:
118 devnull.close()
119
jadmanski663d55a2009-05-21 22:54:28 +0000120
jadmanski663d55a2009-05-21 22:54:28 +0000121def collect_uncollected_logs(host):
122 """Collects any leftover uncollected logs from the client.
123
124 @param host: The RemoteHost to collect from
125 """
mbligh0d0f67d2009-11-06 03:15:03 +0000126 if host.job:
jadmanski96b78072009-05-21 22:21:04 +0000127 try:
mbligh0d0f67d2009-11-06 03:15:03 +0000128 logs = host.job.get_client_logs()
jadmanski96b78072009-05-21 22:21:04 +0000129 for hostname, remote_path, local_path in logs:
130 if hostname == host.hostname:
131 logging.info("Retrieving logs from %s:%s into %s",
jadmanski663d55a2009-05-21 22:54:28 +0000132 hostname, remote_path, local_path)
jadmanski96b78072009-05-21 22:21:04 +0000133 host.get_file(remote_path + "/", local_path + "/")
134 except Exception, e:
135 logging.warning("Error while trying to collect stranded "
jadmanski663d55a2009-05-21 22:54:28 +0000136 "Autotest client logs: %s", e)
jadmanski4900b3b2009-07-02 22:12:08 +0000137
138
139def collect_messages(host):
140 """Collects the 'new' contents of /var/log/messages.
141
142 If host.VAR_LOG_MESSAGE_COPY_PATH is on the remote machine, collects
143 the contents of /var/log/messages excluding whatever initial contents
144 are already present in host.VAR_LOG_MESSAGE_COPY_PATH. If it is not
145 present, simply collects the entire contents of /var/log/messages.
146
147 @param host: The RemoteHost to collect from
148 """
149 crashinfo_dir = get_crashinfo_dir(host)
150
151 try:
152 # paths to the messages files
153 messages = os.path.join(crashinfo_dir, "messages")
154 messages_raw = os.path.join(crashinfo_dir, "messages.raw")
155 messages_at_start = os.path.join(crashinfo_dir, "messages.at_start")
156
157 # grab the files from the remote host
158 collect_log_file(host, host.VAR_LOG_MESSAGES_COPY_PATH,
159 messages_at_start)
160 collect_log_file(host, "/var/log/messages", messages_raw)
161
162 # figure out how much of messages.raw to skip
163 if os.path.exists(messages_at_start):
164 # if the first lines of the messages at start should match the
165 # first lines of the current messages; if they don't then messages
166 # has been erase or rotated and we just grab all of it
167 first_line_at_start = utils.read_one_line(messages_at_start)
168 first_line_now = utils.read_one_line(messages_raw)
169 if first_line_at_start != first_line_now:
170 size_at_start = 0
171 else:
172 size_at_start = os.path.getsize(messages_at_start)
173 else:
174 size_at_start = 0
175 raw_messages_file = open(messages_raw)
176 messages_file = open(messages, "w")
177 raw_messages_file.seek(size_at_start)
178 shutil.copyfileobj(raw_messages_file, messages_file)
179 raw_messages_file.close()
180 messages_file.close()
181
182 # get rid of the "raw" versions of messages
183 os.remove(messages_raw)
184 if os.path.exists(messages_at_start):
185 os.remove(messages_at_start)
186 except Exception, e:
187 logging.warning("Error while collecting /var/log/messages: %s", e)