lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 1 | #!/usr/bin/python |
| 2 | """ |
| 3 | Simple crash handling application for autotest |
| 4 | |
| 5 | @copyright Red Hat Inc 2009 |
| 6 | @author Lucas Meneghel Rodrigues <lmr@redhat.com> |
| 7 | """ |
lmr | d2e6934 | 2010-07-22 22:35:56 +0000 | [diff] [blame^] | 8 | import sys, os, commands, glob, shutil, syslog, re, time, random, string |
| 9 | |
| 10 | |
| 11 | def generate_random_string(length): |
| 12 | """ |
| 13 | Return a random string using alphanumeric characters. |
| 14 | |
| 15 | @length: length of the string that will be generated. |
| 16 | """ |
| 17 | r = random.SystemRandom() |
| 18 | str = "" |
| 19 | chars = string.letters + string.digits |
| 20 | while length > 0: |
| 21 | str += r.choice(chars) |
| 22 | length -= 1 |
| 23 | return str |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 24 | |
| 25 | |
| 26 | def get_parent_pid(pid): |
| 27 | """ |
| 28 | Returns the parent PID for a given PID, converted to an integer. |
| 29 | |
| 30 | @param pid: Process ID. |
| 31 | """ |
| 32 | try: |
| 33 | ppid = int(open('/proc/%s/stat' % pid).read().split()[3]) |
| 34 | except: |
| 35 | # It is not possible to determine the parent because the process |
| 36 | # already left the process table. |
| 37 | ppid = 1 |
| 38 | |
| 39 | return ppid |
| 40 | |
| 41 | |
lmr | d2e6934 | 2010-07-22 22:35:56 +0000 | [diff] [blame^] | 42 | def write_to_file(filename, data, report=False): |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 43 | """ |
| 44 | Write contents to a given file path specified. If not specified, the file |
lmr | d2e6934 | 2010-07-22 22:35:56 +0000 | [diff] [blame^] | 45 | will be created. |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 46 | |
| 47 | @param file_path: Path to a given file. |
lmr | 8cb0fd5 | 2010-06-08 18:00:13 +0000 | [diff] [blame] | 48 | @param data: File contents. |
lmr | d2e6934 | 2010-07-22 22:35:56 +0000 | [diff] [blame^] | 49 | @param report: Whether we'll use GDB to get a backtrace report of the |
| 50 | file. |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 51 | """ |
lmr | 8cb0fd5 | 2010-06-08 18:00:13 +0000 | [diff] [blame] | 52 | f = open(filename, 'w') |
| 53 | try: |
| 54 | f.write(data) |
| 55 | finally: |
| 56 | f.close() |
| 57 | |
lmr | d2e6934 | 2010-07-22 22:35:56 +0000 | [diff] [blame^] | 58 | if report: |
| 59 | gdb_report(filename) |
lmr | 8cb0fd5 | 2010-06-08 18:00:13 +0000 | [diff] [blame] | 60 | |
| 61 | return filename |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 62 | |
| 63 | |
| 64 | def get_results_dir_list(pid, core_dir_basename): |
| 65 | """ |
| 66 | Get all valid output directories for the core file and the report. It works |
| 67 | by inspecting files created by each test on /tmp and verifying if the |
| 68 | PID of the process that crashed is a child or grandchild of the autotest |
| 69 | test process. If it can't find any relationship (maybe a daemon that died |
| 70 | during a test execution), it will write the core file to the debug dirs |
| 71 | of all tests currently being executed. If there are no active autotest |
| 72 | tests at a particular moment, it will return a list with ['/tmp']. |
| 73 | |
| 74 | @param pid: PID for the process that generated the core |
| 75 | @param core_dir_basename: Basename for the directory that will hold both |
| 76 | the core dump and the crash report. |
| 77 | """ |
| 78 | pid_dir_dict = {} |
| 79 | for debugdir_file in glob.glob("/tmp/autotest_results_dir.*"): |
| 80 | a_pid = os.path.splitext(debugdir_file)[1] |
| 81 | results_dir = open(debugdir_file).read().strip() |
| 82 | pid_dir_dict[a_pid] = os.path.join(results_dir, core_dir_basename) |
| 83 | |
| 84 | results_dir_list = [] |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 85 | # If a bug occurs and we can't grab the PID for the process that died, just |
| 86 | # return all directories available and write to all of them. |
| 87 | if pid is not None: |
| 88 | while pid > 1: |
| 89 | if pid in pid_dir_dict: |
| 90 | results_dir_list.append(pid_dir_dict[pid]) |
| 91 | pid = get_parent_pid(pid) |
| 92 | else: |
| 93 | results_dir_list = pid_dir_dict.values() |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 94 | |
lmr | 8cb0fd5 | 2010-06-08 18:00:13 +0000 | [diff] [blame] | 95 | return (results_dir_list or |
| 96 | pid_dir_dict.values() or |
| 97 | [os.path.join("/tmp", core_dir_basename)]) |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 98 | |
| 99 | |
| 100 | def get_info_from_core(path): |
| 101 | """ |
| 102 | Reads a core file and extracts a dictionary with useful core information. |
lmr | d2e6934 | 2010-07-22 22:35:56 +0000 | [diff] [blame^] | 103 | |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 104 | Right now, the only information extracted is the full executable name. |
| 105 | |
| 106 | @param path: Path to core file. |
| 107 | """ |
lmr | 8cb0fd5 | 2010-06-08 18:00:13 +0000 | [diff] [blame] | 108 | full_exe_path = None |
| 109 | output = commands.getoutput('gdb -c %s batch' % path) |
| 110 | path_pattern = re.compile("Core was generated by `([^\0]+)'", re.IGNORECASE) |
| 111 | match = re.findall(path_pattern, output) |
| 112 | for m in match: |
| 113 | # Sometimes the command line args come with the core, so get rid of them |
| 114 | m = m.split(" ")[0] |
| 115 | if os.path.isfile(m): |
| 116 | full_exe_path = m |
| 117 | break |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 118 | |
lmr | 8cb0fd5 | 2010-06-08 18:00:13 +0000 | [diff] [blame] | 119 | if full_exe_path is None: |
| 120 | syslog.syslog("Could not determine from which application core file %s " |
| 121 | "is from" % path) |
| 122 | |
| 123 | return {'full_exe_path': full_exe_path} |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 124 | |
| 125 | |
lmr | d2e6934 | 2010-07-22 22:35:56 +0000 | [diff] [blame^] | 126 | def gdb_report(path): |
| 127 | """ |
| 128 | Use GDB to produce a report with information about a given core. |
| 129 | |
| 130 | @param path: Path to core file. |
| 131 | """ |
| 132 | # Get full command path |
| 133 | exe_path = get_info_from_core(path)['full_exe_path'] |
| 134 | basedir = os.path.dirname(path) |
| 135 | gdb_command_path = os.path.join(basedir, 'gdb_cmd') |
| 136 | |
| 137 | if exe_path is not None: |
| 138 | # Write a command file for GDB |
| 139 | gdb_command = 'bt full\n' |
| 140 | write_to_file(gdb_command_path, gdb_command) |
| 141 | |
| 142 | # Take a backtrace from the running program |
| 143 | gdb_cmd = ('gdb -e %s -c %s -x %s -n -batch -quiet' % |
| 144 | (exe_path, path, gdb_command_path)) |
| 145 | backtrace = commands.getoutput(gdb_cmd) |
| 146 | # Sanitize output before passing it to the report |
| 147 | backtrace = backtrace.decode('utf-8', 'ignore') |
| 148 | else: |
| 149 | exe_path = "Unknown" |
| 150 | backtrace = ("Could not determine backtrace for core file %s" % path) |
| 151 | |
| 152 | # Composing the format_dict |
| 153 | report = "Program: %s\n" % exe_path |
| 154 | if crashed_pid is not None: |
| 155 | report += "PID: %s\n" % crashed_pid |
| 156 | if signal is not None: |
| 157 | report += "Signal: %s\n" % signal |
| 158 | if hostname is not None: |
| 159 | report += "Hostname: %s\n" % hostname |
| 160 | if crash_time is not None: |
| 161 | report += ("Time of the crash (according to kernel): %s\n" % |
| 162 | time.ctime(float(crash_time))) |
| 163 | report += "Program backtrace:\n%s\n" % backtrace |
| 164 | |
| 165 | report_path = os.path.join(basedir, 'report') |
| 166 | write_to_file(report_path, report) |
| 167 | |
| 168 | |
| 169 | def write_cores(core_data, dir_list): |
| 170 | """ |
| 171 | Write core files to all directories, optionally providing reports. |
| 172 | |
| 173 | @param core_data: Contents of the core file. |
| 174 | @param dir_list: List of directories the cores have to be written. |
| 175 | @param report: Whether reports are to be generated for those core files. |
| 176 | """ |
| 177 | syslog.syslog("Writing core files to %s" % dir_list) |
| 178 | for result_dir in dir_list: |
| 179 | if not os.path.isdir(result_dir): |
| 180 | os.makedirs(result_dir) |
| 181 | core_path = os.path.join(result_dir, 'core') |
| 182 | core_path = write_to_file(core_path, core_file, report=True) |
| 183 | |
| 184 | |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 185 | if __name__ == "__main__": |
| 186 | syslog.openlog('AutotestCrashHandler', 0, syslog.LOG_DAEMON) |
lmr | d2e6934 | 2010-07-22 22:35:56 +0000 | [diff] [blame^] | 187 | global crashed_pid, crash_time, uid, signal, hostname, exe |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 188 | try: |
lmr | d2e6934 | 2010-07-22 22:35:56 +0000 | [diff] [blame^] | 189 | full_functionality = False |
mbligh | a6f8e0d | 2009-12-19 05:31:44 +0000 | [diff] [blame] | 190 | try: |
lmr | d2e6934 | 2010-07-22 22:35:56 +0000 | [diff] [blame^] | 191 | crashed_pid, crash_time, uid, signal, hostname, exe = sys.argv[1:] |
| 192 | full_functionality = True |
| 193 | except ValueError, e: |
| 194 | # Probably due a kernel bug, we can't exactly map the parameters |
| 195 | # passed to this script. So we have to reduce the functionality |
| 196 | # of the script (just write the core at a fixed place). |
| 197 | syslog.syslog("Unable to unpack parameters passed to the " |
| 198 | "script. Operating with limited functionality.") |
| 199 | crashed_pid, crash_time, uid, signal, hostname, exe = (None, None, |
| 200 | None, None, |
| 201 | None, None) |
lmr | 95ef4f6 | 2009-09-29 17:30:43 +0000 | [diff] [blame] | 202 | |
lmr | d2e6934 | 2010-07-22 22:35:56 +0000 | [diff] [blame^] | 203 | if full_functionality: |
| 204 | core_dir_name = 'crash.%s.%s' % (exe, crashed_pid) |
lmr | 8cb0fd5 | 2010-06-08 18:00:13 +0000 | [diff] [blame] | 205 | else: |
lmr | d2e6934 | 2010-07-22 22:35:56 +0000 | [diff] [blame^] | 206 | core_dir_name = 'core.%s' % generate_random_string(4) |
| 207 | |
| 208 | # Get the filtered results dir list |
| 209 | results_dir_list = get_results_dir_list(crashed_pid, core_dir_name) |
| 210 | |
| 211 | # Write the core file to the appropriate directory |
| 212 | # (we are piping it to this script) |
| 213 | core_file = sys.stdin.read() |
| 214 | |
| 215 | if (exe is not None) and (crashed_pid is not None): |
| 216 | syslog.syslog("Application %s, PID %s crashed" % (exe, crashed_pid)) |
| 217 | write_cores(core_file, results_dir_list) |
| 218 | |
| 219 | except Exception, e: |
| 220 | syslog.syslog("Crash handler had a problem: %s" % e) |