lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 1 | #!/usr/bin/python |
| 2 | """ |
| 3 | Simple crash handling application for autotest |
| 4 | |
| 5 | @copyright Red Hat Inc 2009 |
| 6 | @author Lucas Meneghel Rodrigues <lmr@redhat.com> |
| 7 | """ |
| 8 | import sys, os, commands, glob, tempfile, shutil, syslog |
| 9 | |
| 10 | |
| 11 | def get_parent_pid(pid): |
| 12 | """ |
| 13 | Returns the parent PID for a given PID, converted to an integer. |
| 14 | |
| 15 | @param pid: Process ID. |
| 16 | """ |
| 17 | try: |
| 18 | ppid = int(open('/proc/%s/stat' % pid).read().split()[3]) |
| 19 | except: |
| 20 | # It is not possible to determine the parent because the process |
| 21 | # already left the process table. |
| 22 | ppid = 1 |
| 23 | |
| 24 | return ppid |
| 25 | |
| 26 | |
| 27 | def write_to_file(file_path, contents): |
| 28 | """ |
| 29 | Write contents to a given file path specified. If not specified, the file |
| 30 | will be created. |
| 31 | |
| 32 | @param file_path: Path to a given file. |
| 33 | @param contents: File contents. |
| 34 | """ |
| 35 | file_object = open(file_path, 'w') |
| 36 | file_object.write(contents) |
| 37 | file_object.close() |
| 38 | |
| 39 | |
| 40 | def get_results_dir_list(pid, core_dir_basename): |
| 41 | """ |
| 42 | Get all valid output directories for the core file and the report. It works |
| 43 | by inspecting files created by each test on /tmp and verifying if the |
| 44 | PID of the process that crashed is a child or grandchild of the autotest |
| 45 | test process. If it can't find any relationship (maybe a daemon that died |
| 46 | during a test execution), it will write the core file to the debug dirs |
| 47 | of all tests currently being executed. If there are no active autotest |
| 48 | tests at a particular moment, it will return a list with ['/tmp']. |
| 49 | |
| 50 | @param pid: PID for the process that generated the core |
| 51 | @param core_dir_basename: Basename for the directory that will hold both |
| 52 | the core dump and the crash report. |
| 53 | """ |
| 54 | pid_dir_dict = {} |
| 55 | for debugdir_file in glob.glob("/tmp/autotest_results_dir.*"): |
| 56 | a_pid = os.path.splitext(debugdir_file)[1] |
| 57 | results_dir = open(debugdir_file).read().strip() |
| 58 | pid_dir_dict[a_pid] = os.path.join(results_dir, core_dir_basename) |
| 59 | |
| 60 | results_dir_list = [] |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 61 | # If a bug occurs and we can't grab the PID for the process that died, just |
| 62 | # return all directories available and write to all of them. |
| 63 | if pid is not None: |
| 64 | while pid > 1: |
| 65 | if pid in pid_dir_dict: |
| 66 | results_dir_list.append(pid_dir_dict[pid]) |
| 67 | pid = get_parent_pid(pid) |
| 68 | else: |
| 69 | results_dir_list = pid_dir_dict.values() |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 70 | |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 71 | return (results_dir_list or |
| 72 | pid_dir_dict.values() or |
| 73 | [os.path.join("/tmp", core_dir_basename)]) |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 74 | |
| 75 | |
| 76 | def get_info_from_core(path): |
| 77 | """ |
| 78 | Reads a core file and extracts a dictionary with useful core information. |
| 79 | Right now, the only information extracted is the full executable name. |
| 80 | |
| 81 | @param path: Path to core file. |
| 82 | """ |
| 83 | # Here we are getting the executable full path in a very inelegant way :( |
| 84 | # Since the 'right' solution for it is to make a library to get information |
| 85 | # from core dump files, properly written, I'll leave this as it is for now. |
| 86 | full_exe_path = commands.getoutput('strings %s | grep "_="' % |
| 87 | path).strip("_=") |
| 88 | if full_exe_path.startswith("./"): |
| 89 | pwd = commands.getoutput('strings %s | grep "^PWD="' % |
| 90 | path).strip("PWD=") |
| 91 | full_exe_path = os.path.join(pwd, full_exe_path.strip("./")) |
| 92 | |
| 93 | return {'core_file': path, 'full_exe_path': full_exe_path} |
| 94 | |
| 95 | |
| 96 | if __name__ == "__main__": |
| 97 | syslog.openlog('AutotestCrashHandler', 0, syslog.LOG_DAEMON) |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 98 | try: |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 99 | try: |
| 100 | full_functionality = False |
| 101 | try: |
| 102 | (crashed_pid, time, uid, signal, hostname, exe) = sys.argv[1:] |
| 103 | full_functionality = True |
| 104 | except ValueError, e: |
| 105 | # Probably due a kernel bug, we can't exactly map the parameters |
| 106 | # passed to this script. So we have to reduce the functionality |
| 107 | # of the script (just write the core at a fixed place). |
| 108 | syslog.syslog("Unable to unpack parameters passed to the " |
| 109 | "script. Operating with limited functionality.") |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 110 | |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 111 | core_name = 'core' |
| 112 | report_name = 'report' |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 113 | |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 114 | core_tmp_dir = tempfile.mkdtemp(prefix='core_', dir='/tmp') |
| 115 | core_tmp_path = os.path.join(core_tmp_dir, core_name) |
| 116 | gdb_command_path = os.path.join(core_tmp_dir, 'gdb_command') |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 117 | |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 118 | if full_functionality: |
| 119 | core_dir_name = 'crash.%s.%s' % (exe, crashed_pid) |
| 120 | else: |
| 121 | crashed_pid = None |
| 122 | core_dir_name = os.path.basename(core_tmp_dir) |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 123 | |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 124 | # Get the filtered results dir list |
| 125 | current_results_dir_list = get_results_dir_list(crashed_pid, |
| 126 | core_dir_name) |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 127 | |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 128 | # Write the core file to the appropriate directory |
| 129 | # (we are piping it to this script) |
| 130 | core_file = sys.stdin.read() |
| 131 | # Write the core file to its temporary location |
| 132 | write_to_file(core_tmp_path, core_file) |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 133 | |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 134 | if not full_functionality: |
| 135 | syslog.syslog(syslog.LOG_INFO, "Writing core files to %s" % |
| 136 | current_results_dir_list) |
| 137 | for result_dir in current_results_dir_list: |
| 138 | if not os.path.isdir(result_dir): |
| 139 | os.makedirs(result_dir) |
| 140 | core_path = os.path.join(result_dir, 'core') |
| 141 | write_to_file(core_path, core_file) |
| 142 | raise ValueError("Incorrect params passed to handler " |
| 143 | "script: %s." % sys.argv[1:]) |
| 144 | |
| 145 | # Write a command file for GDB |
| 146 | gdb_command = 'bt full\n' |
| 147 | write_to_file(gdb_command_path, gdb_command) |
| 148 | |
| 149 | # Get full command path |
| 150 | exe_path = get_info_from_core(core_tmp_path)['full_exe_path'] |
| 151 | |
| 152 | # Take a backtrace from the running program |
| 153 | gdb_cmd = ('gdb -e %s -c %s -x %s -n -batch -quiet' % |
| 154 | (exe_path, core_tmp_path, gdb_command_path)) |
| 155 | backtrace = commands.getoutput(gdb_cmd) |
| 156 | # Sanitize output before passing it to the report |
| 157 | backtrace = backtrace.decode('utf-8', 'ignore') |
| 158 | |
| 159 | # Composing the format_dict |
| 160 | format_dict = {} |
| 161 | format_dict['program'] = exe_path |
| 162 | format_dict['pid'] = crashed_pid |
| 163 | format_dict['signal'] = signal |
| 164 | format_dict['hostname'] = hostname |
| 165 | format_dict['time'] = time |
| 166 | format_dict['backtrace'] = backtrace |
| 167 | |
| 168 | report = """Autotest crash report |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 169 | |
| 170 | Program: %(program)s |
| 171 | PID: %(pid)s |
| 172 | Signal: %(signal)s |
| 173 | Hostname: %(hostname)s |
| 174 | Time of the crash: %(time)s |
| 175 | Program backtrace: |
| 176 | %(backtrace)s |
| 177 | """ % format_dict |
| 178 | |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 179 | syslog.syslog(syslog.LOG_INFO, |
| 180 | "Application %s, PID %s crashed" % |
| 181 | (exe_path, crashed_pid)) |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 182 | |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 183 | # Now, for all results dir, let's create the directory if it doesn't |
| 184 | # exist, and write the core file and the report to it. |
| 185 | syslog.syslog(syslog.LOG_INFO, |
| 186 | "Writing core files and reports to %s" % |
| 187 | current_results_dir_list) |
| 188 | for result_dir in current_results_dir_list: |
| 189 | if not os.path.isdir(result_dir): |
| 190 | os.makedirs(result_dir) |
| 191 | core_path = os.path.join(result_dir, 'core') |
| 192 | write_to_file(core_path, core_file) |
| 193 | report_path = os.path.join(result_dir, 'report') |
| 194 | write_to_file(report_path, report) |
| 195 | |
| 196 | except Exception, e: |
| 197 | syslog.syslog("Crash handler had a problem: %s" % e) |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 198 | |
| 199 | finally: |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 200 | if os.path.isdir(core_tmp_dir): |
| 201 | shutil.rmtree(core_tmp_dir) |