blob: 7ee9a78ff0ff7e152738d90008003c0dd0ef3899 [file] [log] [blame]
lmr95ef4f62009-09-29 17:30:43 +00001#!/usr/bin/python
2"""
3Simple crash handling application for autotest
4
5@copyright Red Hat Inc 2009
6@author Lucas Meneghel Rodrigues <lmr@redhat.com>
7"""
lmr8cb0fd52010-06-08 18:00:13 +00008import sys, os, commands, glob, tempfile, shutil, syslog, re, time
lmr95ef4f62009-09-29 17:30:43 +00009
10
11def get_parent_pid(pid):
12 """
13 Returns the parent PID for a given PID, converted to an integer.
14
15 @param pid: Process ID.
16 """
17 try:
18 ppid = int(open('/proc/%s/stat' % pid).read().split()[3])
19 except:
20 # It is not possible to determine the parent because the process
21 # already left the process table.
22 ppid = 1
23
24 return ppid
25
26
lmr8cb0fd52010-06-08 18:00:13 +000027def write_to_file(filename, data, compress=False):
lmr95ef4f62009-09-29 17:30:43 +000028 """
29 Write contents to a given file path specified. If not specified, the file
lmr8cb0fd52010-06-08 18:00:13 +000030 will be created. Optionally, compress the destination file.
lmr95ef4f62009-09-29 17:30:43 +000031
32 @param file_path: Path to a given file.
lmr8cb0fd52010-06-08 18:00:13 +000033 @param data: File contents.
34 @param compress: Whether the file is going to be compressed at the end of
35 the data write process.
lmr95ef4f62009-09-29 17:30:43 +000036 """
lmr8cb0fd52010-06-08 18:00:13 +000037 f = open(filename, 'w')
38 try:
39 f.write(data)
40 finally:
41 f.close()
42
43 if compress:
44 s, o = commands.getstatusoutput('bzip2 %s' % filename)
45 if s:
46 syslog.syslog("File %s compression failed: %s" % (filename, o))
47 else:
48 filename += '.bz2'
49
50 return filename
lmr95ef4f62009-09-29 17:30:43 +000051
52
53def get_results_dir_list(pid, core_dir_basename):
54 """
55 Get all valid output directories for the core file and the report. It works
56 by inspecting files created by each test on /tmp and verifying if the
57 PID of the process that crashed is a child or grandchild of the autotest
58 test process. If it can't find any relationship (maybe a daemon that died
59 during a test execution), it will write the core file to the debug dirs
60 of all tests currently being executed. If there are no active autotest
61 tests at a particular moment, it will return a list with ['/tmp'].
62
63 @param pid: PID for the process that generated the core
64 @param core_dir_basename: Basename for the directory that will hold both
65 the core dump and the crash report.
66 """
67 pid_dir_dict = {}
68 for debugdir_file in glob.glob("/tmp/autotest_results_dir.*"):
69 a_pid = os.path.splitext(debugdir_file)[1]
70 results_dir = open(debugdir_file).read().strip()
71 pid_dir_dict[a_pid] = os.path.join(results_dir, core_dir_basename)
72
73 results_dir_list = []
mbligha6f8e0d2009-12-19 05:31:44 +000074 # If a bug occurs and we can't grab the PID for the process that died, just
75 # return all directories available and write to all of them.
76 if pid is not None:
77 while pid > 1:
78 if pid in pid_dir_dict:
79 results_dir_list.append(pid_dir_dict[pid])
80 pid = get_parent_pid(pid)
81 else:
82 results_dir_list = pid_dir_dict.values()
lmr95ef4f62009-09-29 17:30:43 +000083
lmr8cb0fd52010-06-08 18:00:13 +000084 return (results_dir_list or
85 pid_dir_dict.values() or
86 [os.path.join("/tmp", core_dir_basename)])
lmr95ef4f62009-09-29 17:30:43 +000087
88
89def get_info_from_core(path):
90 """
91 Reads a core file and extracts a dictionary with useful core information.
92 Right now, the only information extracted is the full executable name.
93
94 @param path: Path to core file.
95 """
lmr8cb0fd52010-06-08 18:00:13 +000096 full_exe_path = None
97 output = commands.getoutput('gdb -c %s batch' % path)
98 path_pattern = re.compile("Core was generated by `([^\0]+)'", re.IGNORECASE)
99 match = re.findall(path_pattern, output)
100 for m in match:
101 # Sometimes the command line args come with the core, so get rid of them
102 m = m.split(" ")[0]
103 if os.path.isfile(m):
104 full_exe_path = m
105 break
lmr95ef4f62009-09-29 17:30:43 +0000106
lmr8cb0fd52010-06-08 18:00:13 +0000107 if full_exe_path is None:
108 syslog.syslog("Could not determine from which application core file %s "
109 "is from" % path)
110
111 return {'full_exe_path': full_exe_path}
lmr95ef4f62009-09-29 17:30:43 +0000112
113
114if __name__ == "__main__":
115 syslog.openlog('AutotestCrashHandler', 0, syslog.LOG_DAEMON)
lmr95ef4f62009-09-29 17:30:43 +0000116 try:
mbligha6f8e0d2009-12-19 05:31:44 +0000117 try:
118 full_functionality = False
119 try:
lmr8cb0fd52010-06-08 18:00:13 +0000120 (crashed_pid, crash_time, uid, signal, hostname, exe) = sys.argv[1:]
mbligha6f8e0d2009-12-19 05:31:44 +0000121 full_functionality = True
122 except ValueError, e:
123 # Probably due a kernel bug, we can't exactly map the parameters
124 # passed to this script. So we have to reduce the functionality
125 # of the script (just write the core at a fixed place).
126 syslog.syslog("Unable to unpack parameters passed to the "
127 "script. Operating with limited functionality.")
lmr95ef4f62009-09-29 17:30:43 +0000128
mbligha6f8e0d2009-12-19 05:31:44 +0000129 core_name = 'core'
130 report_name = 'report'
lmr95ef4f62009-09-29 17:30:43 +0000131
mbligha6f8e0d2009-12-19 05:31:44 +0000132 core_tmp_dir = tempfile.mkdtemp(prefix='core_', dir='/tmp')
133 core_tmp_path = os.path.join(core_tmp_dir, core_name)
134 gdb_command_path = os.path.join(core_tmp_dir, 'gdb_command')
lmr95ef4f62009-09-29 17:30:43 +0000135
mbligha6f8e0d2009-12-19 05:31:44 +0000136 if full_functionality:
137 core_dir_name = 'crash.%s.%s' % (exe, crashed_pid)
138 else:
139 crashed_pid = None
140 core_dir_name = os.path.basename(core_tmp_dir)
lmr95ef4f62009-09-29 17:30:43 +0000141
mbligha6f8e0d2009-12-19 05:31:44 +0000142 # Get the filtered results dir list
143 current_results_dir_list = get_results_dir_list(crashed_pid,
144 core_dir_name)
lmr95ef4f62009-09-29 17:30:43 +0000145
mbligha6f8e0d2009-12-19 05:31:44 +0000146 # Write the core file to the appropriate directory
147 # (we are piping it to this script)
148 core_file = sys.stdin.read()
lmr8cb0fd52010-06-08 18:00:13 +0000149 # Write the core file to its temporary location, let's keep it
150 # there in case something goes wrong
151 core_tmp_path = write_to_file(core_tmp_path, core_file)
152 processing_succeed = False
lmr95ef4f62009-09-29 17:30:43 +0000153
mbligha6f8e0d2009-12-19 05:31:44 +0000154 if not full_functionality:
lmr8cb0fd52010-06-08 18:00:13 +0000155 syslog.syslog("Writing core files to %s" %
mbligha6f8e0d2009-12-19 05:31:44 +0000156 current_results_dir_list)
157 for result_dir in current_results_dir_list:
158 if not os.path.isdir(result_dir):
159 os.makedirs(result_dir)
160 core_path = os.path.join(result_dir, 'core')
lmr8cb0fd52010-06-08 18:00:13 +0000161 core_path = write_to_file(core_path, core_file,
162 compress=True)
163 processing_succeed = True
mbligha6f8e0d2009-12-19 05:31:44 +0000164 raise ValueError("Incorrect params passed to handler "
165 "script: %s." % sys.argv[1:])
166
mbligha6f8e0d2009-12-19 05:31:44 +0000167 # Get full command path
168 exe_path = get_info_from_core(core_tmp_path)['full_exe_path']
169
lmr8cb0fd52010-06-08 18:00:13 +0000170 if exe_path is not None:
171 # Write a command file for GDB
172 gdb_command = 'bt full\n'
173 write_to_file(gdb_command_path, gdb_command)
174
175 # Take a backtrace from the running program
176 gdb_cmd = ('gdb -e %s -c %s -x %s -n -batch -quiet' %
177 (exe_path, core_tmp_path, gdb_command_path))
178 backtrace = commands.getoutput(gdb_cmd)
179 # Sanitize output before passing it to the report
180 backtrace = backtrace.decode('utf-8', 'ignore')
181 else:
182 exe_path = "Unknown"
183 backtrace = ("Could not determine backtrace for core file %s" %
184 core_tmp_path)
mbligha6f8e0d2009-12-19 05:31:44 +0000185
186 # Composing the format_dict
lmr8cb0fd52010-06-08 18:00:13 +0000187 report = "Program: %s\n" % exe_path
188 report += "PID: %s\n" % crashed_pid
189 report += "Signal: %s\n" % signal
190 report += "Hostname: %s\n" % hostname
191 report += "Time of the crash: %s\n" % time.ctime(float(crash_time))
192 report += "Program backtrace:\n%s\n" % backtrace
mbligha6f8e0d2009-12-19 05:31:44 +0000193
lmr8cb0fd52010-06-08 18:00:13 +0000194 syslog.syslog("Application %s, PID %s crashed" %
mbligha6f8e0d2009-12-19 05:31:44 +0000195 (exe_path, crashed_pid))
lmr95ef4f62009-09-29 17:30:43 +0000196
mbligha6f8e0d2009-12-19 05:31:44 +0000197 # Now, for all results dir, let's create the directory if it doesn't
198 # exist, and write the core file and the report to it.
lmr8cb0fd52010-06-08 18:00:13 +0000199 syslog.syslog("Writing core files and reports to %s" %
mbligha6f8e0d2009-12-19 05:31:44 +0000200 current_results_dir_list)
201 for result_dir in current_results_dir_list:
202 if not os.path.isdir(result_dir):
203 os.makedirs(result_dir)
204 core_path = os.path.join(result_dir, 'core')
lmr8cb0fd52010-06-08 18:00:13 +0000205 core_path = write_to_file(core_path, core_file, compress=True)
mbligha6f8e0d2009-12-19 05:31:44 +0000206 report_path = os.path.join(result_dir, 'report')
207 write_to_file(report_path, report)
lmr8cb0fd52010-06-08 18:00:13 +0000208 processing_succeed = True
mbligha6f8e0d2009-12-19 05:31:44 +0000209
210 except Exception, e:
211 syslog.syslog("Crash handler had a problem: %s" % e)
lmr95ef4f62009-09-29 17:30:43 +0000212
213 finally:
lmr8cb0fd52010-06-08 18:00:13 +0000214 if processing_succeed:
215 if os.path.isdir(core_tmp_dir):
216 shutil.rmtree(core_tmp_dir)
217 else:
218 syslog.syslog("Crash handler failed to process the core file. "
219 "A copy of the file was kept at %s" %
220 core_tmp_path)