blob: 6e535b887f33b8c26251bef3acb3b4d81ec1b937 [file] [log] [blame]
jadmanski4900b3b2009-07-02 22:12:08 +00001import os, time, pickle, logging, shutil
jadmanski96b78072009-05-21 22:21:04 +00002
3from autotest_lib.server import utils, profiler
4
5
6# import any site hooks for the crashdump and crashinfo collection
7get_site_crashdumps = utils.import_site_function(
8 __file__, "autotest_lib.server.site_crashcollect", "get_site_crashdumps",
9 lambda host, test_start_time: None)
10get_site_crashinfo = utils.import_site_function(
11 __file__, "autotest_lib.server.site_crashcollect", "get_site_crashinfo",
12 lambda host, test_start_time: None)
13
14
15def get_crashdumps(host, test_start_time):
16 get_site_crashdumps(host, test_start_time)
17
18
19def get_crashinfo(host, test_start_time):
20 logging.info("Collecting crash information...")
21
22 # include crashdumps as part of the general crashinfo
23 get_crashdumps(host, test_start_time)
24
jadmanski663d55a2009-05-21 22:54:28 +000025 if wait_for_machine_to_recover(host):
26 # run any site-specific collection
27 get_site_crashinfo(host, test_start_time)
28
29 crashinfo_dir = get_crashinfo_dir(host)
jadmanski4900b3b2009-07-02 22:12:08 +000030 collect_messages(host)
jadmanski663d55a2009-05-21 22:54:28 +000031 collect_log_file(host, "/var/log/monitor-ssh-reboots", crashinfo_dir)
32 collect_command(host, "dmesg", os.path.join(crashinfo_dir, "dmesg"))
jadmanski663d55a2009-05-21 22:54:28 +000033 collect_uncollected_logs(host)
34
35
36def wait_for_machine_to_recover(host, hours_to_wait=4.0):
37 """Wait for a machine (possibly down) to become accessible again.
38
39 @param host: A RemoteHost instance to wait on
40 @param hours_to_wait: Number of hours to wait before giving up
41
42 @returns: True if the machine comes back up, False otherwise
43 """
jadmanski96b78072009-05-21 22:21:04 +000044 current_time = time.strftime("%b %d %H:%M:%S", time.localtime())
45 logging.info("Waiting four hours for %s to come up (%s)",
46 host.hostname, current_time)
jadmanski663d55a2009-05-21 22:54:28 +000047 if not host.wait_up(timeout=hours_to_wait * 3600):
jadmanski96b78072009-05-21 22:21:04 +000048 logging.warning("%s down, unable to collect crash info",
49 host.hostname)
jadmanski663d55a2009-05-21 22:54:28 +000050 return False
jadmanski96b78072009-05-21 22:21:04 +000051 else:
52 logging.info("%s is back up, collecting crash info", host.hostname)
jadmanski663d55a2009-05-21 22:54:28 +000053 return True
jadmanski96b78072009-05-21 22:21:04 +000054
jadmanski96b78072009-05-21 22:21:04 +000055
jadmanski663d55a2009-05-21 22:54:28 +000056def get_crashinfo_dir(host):
57 """Find and if necessary create a directory to store crashinfo in.
58
59 @param host: The RemoteHost object that crashinfo will be collected from
60
61 @returns: The path to an existing directory for writing crashinfo into
62 """
jadmanski96b78072009-05-21 22:21:04 +000063 host_resultdir = getattr(getattr(host, "job", None), "resultdir", None)
64 if host_resultdir:
65 infodir = host_resultdir
66 else:
67 infodir = os.path.abspath(os.getcwd())
68 infodir = os.path.join(infodir, "crashinfo.%s" % host.hostname)
69 if not os.path.exists(infodir):
70 os.mkdir(infodir)
jadmanski663d55a2009-05-21 22:54:28 +000071 return infodir
jadmanski96b78072009-05-21 22:21:04 +000072
jadmanski96b78072009-05-21 22:21:04 +000073
jadmanski663d55a2009-05-21 22:54:28 +000074def collect_log_file(host, log_path, dest_path):
75 """Collects a log file from the remote machine.
76
77 Log files are collected from the remote machine and written into the
78 destination path. If dest_path is a directory, the log file will be named
79 using the basename of the remote log path.
80
81 @param host: The RemoteHost to collect logs from
82 @param log_path: The remote path to collect the log file from
83 @param dest_path: A path (file or directory) to write the copies logs into
84 """
85 logging.info("Collecting %s...", log_path)
86 try:
87 host.get_file(log_path, dest_path, preserve_perm=False)
88 except Exception:
89 logging.warning("Collection of %s failed", log_path)
90
91
92
93def collect_command(host, command, dest_path):
94 """Collects the result of a command on the remote machine.
95
96 The standard output of the command will be collected and written into the
97 desitionation path. The destination path is assumed to be filename and
98 not a directory.
99
100 @param host: The RemoteHost to collect from
101 @param command: A shell command to run on the remote machine and capture
102 the output from.
103 @param dest_path: A file path to write the results of the log into
104 """
105 logging.info("Collecting '%s' ...", command)
jadmanski96b78072009-05-21 22:21:04 +0000106 devnull = open("/dev/null", "w")
107 try:
108 try:
jadmanski663d55a2009-05-21 22:54:28 +0000109 result = host.run(command, stdout_tee=devnull).stdout
110 utils.open_write_close(dest_path, result)
jadmanski96b78072009-05-21 22:21:04 +0000111 except Exception, e:
jadmanski663d55a2009-05-21 22:54:28 +0000112 logging.warning("Collection of '%s' failed:\n%s", command, e)
jadmanski96b78072009-05-21 22:21:04 +0000113 finally:
114 devnull.close()
115
jadmanski663d55a2009-05-21 22:54:28 +0000116
jadmanski663d55a2009-05-21 22:54:28 +0000117def collect_uncollected_logs(host):
118 """Collects any leftover uncollected logs from the client.
119
120 @param host: The RemoteHost to collect from
121 """
jadmanski96b78072009-05-21 22:21:04 +0000122 if not host.job.uncollected_log_file:
123 host.job.uncollected_log_file = ''
jadmanski663d55a2009-05-21 22:54:28 +0000124
jadmanski96b78072009-05-21 22:21:04 +0000125 if host.job and os.path.exists(host.job.uncollected_log_file):
126 try:
127 logs = pickle.load(open(host.job.uncollected_log_file))
128 for hostname, remote_path, local_path in logs:
129 if hostname == host.hostname:
130 logging.info("Retrieving logs from %s:%s into %s",
jadmanski663d55a2009-05-21 22:54:28 +0000131 hostname, remote_path, local_path)
jadmanski96b78072009-05-21 22:21:04 +0000132 host.get_file(remote_path + "/", local_path + "/")
133 except Exception, e:
134 logging.warning("Error while trying to collect stranded "
jadmanski663d55a2009-05-21 22:54:28 +0000135 "Autotest client logs: %s", e)
jadmanski4900b3b2009-07-02 22:12:08 +0000136
137
138def collect_messages(host):
139 """Collects the 'new' contents of /var/log/messages.
140
141 If host.VAR_LOG_MESSAGE_COPY_PATH is on the remote machine, collects
142 the contents of /var/log/messages excluding whatever initial contents
143 are already present in host.VAR_LOG_MESSAGE_COPY_PATH. If it is not
144 present, simply collects the entire contents of /var/log/messages.
145
146 @param host: The RemoteHost to collect from
147 """
148 crashinfo_dir = get_crashinfo_dir(host)
149
150 try:
151 # paths to the messages files
152 messages = os.path.join(crashinfo_dir, "messages")
153 messages_raw = os.path.join(crashinfo_dir, "messages.raw")
154 messages_at_start = os.path.join(crashinfo_dir, "messages.at_start")
155
156 # grab the files from the remote host
157 collect_log_file(host, host.VAR_LOG_MESSAGES_COPY_PATH,
158 messages_at_start)
159 collect_log_file(host, "/var/log/messages", messages_raw)
160
161 # figure out how much of messages.raw to skip
162 if os.path.exists(messages_at_start):
163 # if the first lines of the messages at start should match the
164 # first lines of the current messages; if they don't then messages
165 # has been erase or rotated and we just grab all of it
166 first_line_at_start = utils.read_one_line(messages_at_start)
167 first_line_now = utils.read_one_line(messages_raw)
168 if first_line_at_start != first_line_now:
169 size_at_start = 0
170 else:
171 size_at_start = os.path.getsize(messages_at_start)
172 else:
173 size_at_start = 0
174 raw_messages_file = open(messages_raw)
175 messages_file = open(messages, "w")
176 raw_messages_file.seek(size_at_start)
177 shutil.copyfileobj(raw_messages_file, messages_file)
178 raw_messages_file.close()
179 messages_file.close()
180
181 # get rid of the "raw" versions of messages
182 os.remove(messages_raw)
183 if os.path.exists(messages_at_start):
184 os.remove(messages_at_start)
185 except Exception, e:
186 logging.warning("Error while collecting /var/log/messages: %s", e)