lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 1 | #!/usr/bin/python |
| 2 | """ |
| 3 | Simple crash handling application for autotest |
| 4 | |
| 5 | @copyright Red Hat Inc 2009 |
| 6 | @author Lucas Meneghel Rodrigues <lmr@redhat.com> |
| 7 | """ |
lmr | 8cb0fd5 | 2010-06-08 18:00:13 +0000 | [diff] [blame^] | 8 | import sys, os, commands, glob, tempfile, shutil, syslog, re, time |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 9 | |
| 10 | |
| 11 | def get_parent_pid(pid): |
| 12 | """ |
| 13 | Returns the parent PID for a given PID, converted to an integer. |
| 14 | |
| 15 | @param pid: Process ID. |
| 16 | """ |
| 17 | try: |
| 18 | ppid = int(open('/proc/%s/stat' % pid).read().split()[3]) |
| 19 | except: |
| 20 | # It is not possible to determine the parent because the process |
| 21 | # already left the process table. |
| 22 | ppid = 1 |
| 23 | |
| 24 | return ppid |
| 25 | |
| 26 | |
lmr | 8cb0fd5 | 2010-06-08 18:00:13 +0000 | [diff] [blame^] | 27 | def write_to_file(filename, data, compress=False): |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 28 | """ |
| 29 | Write contents to a given file path specified. If not specified, the file |
lmr | 8cb0fd5 | 2010-06-08 18:00:13 +0000 | [diff] [blame^] | 30 | will be created. Optionally, compress the destination file. |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 31 | |
| 32 | @param file_path: Path to a given file. |
lmr | 8cb0fd5 | 2010-06-08 18:00:13 +0000 | [diff] [blame^] | 33 | @param data: File contents. |
| 34 | @param compress: Whether the file is going to be compressed at the end of |
| 35 | the data write process. |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 36 | """ |
lmr | 8cb0fd5 | 2010-06-08 18:00:13 +0000 | [diff] [blame^] | 37 | f = open(filename, 'w') |
| 38 | try: |
| 39 | f.write(data) |
| 40 | finally: |
| 41 | f.close() |
| 42 | |
| 43 | if compress: |
| 44 | s, o = commands.getstatusoutput('bzip2 %s' % filename) |
| 45 | if s: |
| 46 | syslog.syslog("File %s compression failed: %s" % (filename, o)) |
| 47 | else: |
| 48 | filename += '.bz2' |
| 49 | |
| 50 | return filename |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 51 | |
| 52 | |
| 53 | def get_results_dir_list(pid, core_dir_basename): |
| 54 | """ |
| 55 | Get all valid output directories for the core file and the report. It works |
| 56 | by inspecting files created by each test on /tmp and verifying if the |
| 57 | PID of the process that crashed is a child or grandchild of the autotest |
| 58 | test process. If it can't find any relationship (maybe a daemon that died |
| 59 | during a test execution), it will write the core file to the debug dirs |
| 60 | of all tests currently being executed. If there are no active autotest |
| 61 | tests at a particular moment, it will return a list with ['/tmp']. |
| 62 | |
| 63 | @param pid: PID for the process that generated the core |
| 64 | @param core_dir_basename: Basename for the directory that will hold both |
| 65 | the core dump and the crash report. |
| 66 | """ |
| 67 | pid_dir_dict = {} |
| 68 | for debugdir_file in glob.glob("/tmp/autotest_results_dir.*"): |
| 69 | a_pid = os.path.splitext(debugdir_file)[1] |
| 70 | results_dir = open(debugdir_file).read().strip() |
| 71 | pid_dir_dict[a_pid] = os.path.join(results_dir, core_dir_basename) |
| 72 | |
| 73 | results_dir_list = [] |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 74 | # If a bug occurs and we can't grab the PID for the process that died, just |
| 75 | # return all directories available and write to all of them. |
| 76 | if pid is not None: |
| 77 | while pid > 1: |
| 78 | if pid in pid_dir_dict: |
| 79 | results_dir_list.append(pid_dir_dict[pid]) |
| 80 | pid = get_parent_pid(pid) |
| 81 | else: |
| 82 | results_dir_list = pid_dir_dict.values() |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 83 | |
lmr | 8cb0fd5 | 2010-06-08 18:00:13 +0000 | [diff] [blame^] | 84 | return (results_dir_list or |
| 85 | pid_dir_dict.values() or |
| 86 | [os.path.join("/tmp", core_dir_basename)]) |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 87 | |
| 88 | |
| 89 | def get_info_from_core(path): |
| 90 | """ |
| 91 | Reads a core file and extracts a dictionary with useful core information. |
| 92 | Right now, the only information extracted is the full executable name. |
| 93 | |
| 94 | @param path: Path to core file. |
| 95 | """ |
lmr | 8cb0fd5 | 2010-06-08 18:00:13 +0000 | [diff] [blame^] | 96 | full_exe_path = None |
| 97 | output = commands.getoutput('gdb -c %s batch' % path) |
| 98 | path_pattern = re.compile("Core was generated by `([^\0]+)'", re.IGNORECASE) |
| 99 | match = re.findall(path_pattern, output) |
| 100 | for m in match: |
| 101 | # Sometimes the command line args come with the core, so get rid of them |
| 102 | m = m.split(" ")[0] |
| 103 | if os.path.isfile(m): |
| 104 | full_exe_path = m |
| 105 | break |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 106 | |
lmr | 8cb0fd5 | 2010-06-08 18:00:13 +0000 | [diff] [blame^] | 107 | if full_exe_path is None: |
| 108 | syslog.syslog("Could not determine from which application core file %s " |
| 109 | "is from" % path) |
| 110 | |
| 111 | return {'full_exe_path': full_exe_path} |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 112 | |
| 113 | |
| 114 | if __name__ == "__main__": |
| 115 | syslog.openlog('AutotestCrashHandler', 0, syslog.LOG_DAEMON) |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 116 | try: |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 117 | try: |
| 118 | full_functionality = False |
| 119 | try: |
lmr | 8cb0fd5 | 2010-06-08 18:00:13 +0000 | [diff] [blame^] | 120 | (crashed_pid, crash_time, uid, signal, hostname, exe) = sys.argv[1:] |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 121 | full_functionality = True |
| 122 | except ValueError, e: |
| 123 | # Probably due a kernel bug, we can't exactly map the parameters |
| 124 | # passed to this script. So we have to reduce the functionality |
| 125 | # of the script (just write the core at a fixed place). |
| 126 | syslog.syslog("Unable to unpack parameters passed to the " |
| 127 | "script. Operating with limited functionality.") |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 128 | |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 129 | core_name = 'core' |
| 130 | report_name = 'report' |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 131 | |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 132 | core_tmp_dir = tempfile.mkdtemp(prefix='core_', dir='/tmp') |
| 133 | core_tmp_path = os.path.join(core_tmp_dir, core_name) |
| 134 | gdb_command_path = os.path.join(core_tmp_dir, 'gdb_command') |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 135 | |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 136 | if full_functionality: |
| 137 | core_dir_name = 'crash.%s.%s' % (exe, crashed_pid) |
| 138 | else: |
| 139 | crashed_pid = None |
| 140 | core_dir_name = os.path.basename(core_tmp_dir) |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 141 | |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 142 | # Get the filtered results dir list |
| 143 | current_results_dir_list = get_results_dir_list(crashed_pid, |
| 144 | core_dir_name) |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 145 | |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 146 | # Write the core file to the appropriate directory |
| 147 | # (we are piping it to this script) |
| 148 | core_file = sys.stdin.read() |
lmr | 8cb0fd5 | 2010-06-08 18:00:13 +0000 | [diff] [blame^] | 149 | # Write the core file to its temporary location, let's keep it |
| 150 | # there in case something goes wrong |
| 151 | core_tmp_path = write_to_file(core_tmp_path, core_file) |
| 152 | processing_succeed = False |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 153 | |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 154 | if not full_functionality: |
lmr | 8cb0fd5 | 2010-06-08 18:00:13 +0000 | [diff] [blame^] | 155 | syslog.syslog("Writing core files to %s" % |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 156 | current_results_dir_list) |
| 157 | for result_dir in current_results_dir_list: |
| 158 | if not os.path.isdir(result_dir): |
| 159 | os.makedirs(result_dir) |
| 160 | core_path = os.path.join(result_dir, 'core') |
lmr | 8cb0fd5 | 2010-06-08 18:00:13 +0000 | [diff] [blame^] | 161 | core_path = write_to_file(core_path, core_file, |
| 162 | compress=True) |
| 163 | processing_succeed = True |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 164 | raise ValueError("Incorrect params passed to handler " |
| 165 | "script: %s." % sys.argv[1:]) |
| 166 | |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 167 | # Get full command path |
| 168 | exe_path = get_info_from_core(core_tmp_path)['full_exe_path'] |
| 169 | |
lmr | 8cb0fd5 | 2010-06-08 18:00:13 +0000 | [diff] [blame^] | 170 | if exe_path is not None: |
| 171 | # Write a command file for GDB |
| 172 | gdb_command = 'bt full\n' |
| 173 | write_to_file(gdb_command_path, gdb_command) |
| 174 | |
| 175 | # Take a backtrace from the running program |
| 176 | gdb_cmd = ('gdb -e %s -c %s -x %s -n -batch -quiet' % |
| 177 | (exe_path, core_tmp_path, gdb_command_path)) |
| 178 | backtrace = commands.getoutput(gdb_cmd) |
| 179 | # Sanitize output before passing it to the report |
| 180 | backtrace = backtrace.decode('utf-8', 'ignore') |
| 181 | else: |
| 182 | exe_path = "Unknown" |
| 183 | backtrace = ("Could not determine backtrace for core file %s" % |
| 184 | core_tmp_path) |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 185 | |
| 186 | # Composing the format_dict |
lmr | 8cb0fd5 | 2010-06-08 18:00:13 +0000 | [diff] [blame^] | 187 | report = "Program: %s\n" % exe_path |
| 188 | report += "PID: %s\n" % crashed_pid |
| 189 | report += "Signal: %s\n" % signal |
| 190 | report += "Hostname: %s\n" % hostname |
| 191 | report += "Time of the crash: %s\n" % time.ctime(float(crash_time)) |
| 192 | report += "Program backtrace:\n%s\n" % backtrace |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 193 | |
lmr | 8cb0fd5 | 2010-06-08 18:00:13 +0000 | [diff] [blame^] | 194 | syslog.syslog("Application %s, PID %s crashed" % |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 195 | (exe_path, crashed_pid)) |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 196 | |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 197 | # Now, for all results dir, let's create the directory if it doesn't |
| 198 | # exist, and write the core file and the report to it. |
lmr | 8cb0fd5 | 2010-06-08 18:00:13 +0000 | [diff] [blame^] | 199 | syslog.syslog("Writing core files and reports to %s" % |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 200 | current_results_dir_list) |
| 201 | for result_dir in current_results_dir_list: |
| 202 | if not os.path.isdir(result_dir): |
| 203 | os.makedirs(result_dir) |
| 204 | core_path = os.path.join(result_dir, 'core') |
lmr | 8cb0fd5 | 2010-06-08 18:00:13 +0000 | [diff] [blame^] | 205 | core_path = write_to_file(core_path, core_file, compress=True) |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 206 | report_path = os.path.join(result_dir, 'report') |
| 207 | write_to_file(report_path, report) |
lmr | 8cb0fd5 | 2010-06-08 18:00:13 +0000 | [diff] [blame^] | 208 | processing_succeed = True |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 209 | |
| 210 | except Exception, e: |
| 211 | syslog.syslog("Crash handler had a problem: %s" % e) |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 212 | |
| 213 | finally: |
lmr | 8cb0fd5 | 2010-06-08 18:00:13 +0000 | [diff] [blame^] | 214 | if processing_succeed: |
| 215 | if os.path.isdir(core_tmp_dir): |
| 216 | shutil.rmtree(core_tmp_dir) |
| 217 | else: |
| 218 | syslog.syslog("Crash handler failed to process the core file. " |
| 219 | "A copy of the file was kept at %s" % |
| 220 | core_tmp_path) |