Sasha Goldshtein | 4f1ea67 | 2016-02-07 01:57:42 -0800 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | |
| 3 | from bcc import BPF |
| 4 | from time import sleep |
| 5 | import argparse |
| 6 | import subprocess |
Sasha Goldshtein | cfce311 | 2016-02-07 11:09:36 -0800 | [diff] [blame] | 7 | import ctypes |
| 8 | import os |
Sasha Goldshtein | 4f1ea67 | 2016-02-07 01:57:42 -0800 | [diff] [blame] | 9 | |
Sasha Goldshtein | a7cc6c2 | 2016-02-07 12:03:54 -0800 | [diff] [blame] | 10 | class Time(object): |
Sasha Goldshtein | 751fce5 | 2016-02-08 02:57:02 -0800 | [diff] [blame] | 11 | # BPF timestamps come from the monotonic clock. To be able to filter |
| 12 | # and compare them from Python, we need to invoke clock_gettime from librt. |
Sasha Goldshtein | a7cc6c2 | 2016-02-07 12:03:54 -0800 | [diff] [blame] | 13 | # Adapted from http://stackoverflow.com/a/1205762 |
| 14 | CLOCK_MONOTONIC_RAW = 4 # see <linux/time.h> |
| 15 | |
| 16 | class timespec(ctypes.Structure): |
| 17 | _fields_ = [ |
| 18 | ('tv_sec', ctypes.c_long), |
| 19 | ('tv_nsec', ctypes.c_long) |
| 20 | ] |
| 21 | |
| 22 | librt = ctypes.CDLL('librt.so.1', use_errno=True) |
| 23 | clock_gettime = librt.clock_gettime |
| 24 | clock_gettime.argtypes = [ctypes.c_int, ctypes.POINTER(timespec)] |
| 25 | |
| 26 | @staticmethod |
| 27 | def monotonic_time(): |
Sasha Goldshtein | a7cc6c2 | 2016-02-07 12:03:54 -0800 | [diff] [blame] | 28 | t = Time.timespec() |
| 29 | if Time.clock_gettime(Time.CLOCK_MONOTONIC_RAW , ctypes.pointer(t)) != 0: |
| 30 | errno_ = ctypes.get_errno() |
| 31 | raise OSError(errno_, os.strerror(errno_)) |
| 32 | return t.tv_sec*1e9 + t.tv_nsec |
| 33 | |
Sasha Goldshtein | 2922861 | 2016-02-07 12:20:19 -0800 | [diff] [blame] | 34 | class StackDecoder(object): |
| 35 | def __init__(self, pid, bpf): |
Sasha Goldshtein | 2922861 | 2016-02-07 12:20:19 -0800 | [diff] [blame] | 36 | self.pid = pid |
| 37 | self.bpf = bpf |
| 38 | self.ranges_cache = {} |
| 39 | self.refresh_code_ranges() |
| 40 | |
| 41 | def refresh_code_ranges(self): |
Sasha Goldshtein | 2922861 | 2016-02-07 12:20:19 -0800 | [diff] [blame] | 42 | if self.pid == -1: |
| 43 | return |
| 44 | self.code_ranges = self._get_code_ranges() |
| 45 | |
| 46 | def _get_code_ranges(self): |
| 47 | ranges = {} |
| 48 | raw_ranges = open("/proc/%d/maps" % self.pid).readlines() |
| 49 | for raw_range in raw_ranges: |
Sasha Goldshtein | 751fce5 | 2016-02-08 02:57:02 -0800 | [diff] [blame] | 50 | # A typical line from /proc/PID/maps looks like this: |
| 51 | # 7f21b6635000-7f21b67eb000 r-xp 00000000 fd:00 1442606 /usr/lib64/libc-2.21.so |
| 52 | # We are looking for executable segments that have a binary (.so |
| 53 | # or the main executable). The first two lines are the range of |
| 54 | # that memory segment, which we index by binary name. |
Sasha Goldshtein | 2922861 | 2016-02-07 12:20:19 -0800 | [diff] [blame] | 55 | parts = raw_range.split() |
| 56 | if len(parts) < 6 or parts[5][0] == '[' or not 'x' in parts[1]: |
| 57 | continue |
| 58 | binary = parts[5] |
| 59 | range_parts = parts[0].split('-') |
| 60 | addr_range = (int(range_parts[0], 16), int(range_parts[1], 16)) |
| 61 | ranges[binary] = addr_range |
| 62 | return ranges |
| 63 | |
| 64 | def _get_sym_ranges(self, binary): |
| 65 | if binary in self.ranges_cache: |
| 66 | return self.ranges_cache[binary] |
| 67 | sym_ranges = {} |
Sasha Goldshtein | 751fce5 | 2016-02-08 02:57:02 -0800 | [diff] [blame] | 68 | raw_symbols = run_command_get_output("objdump -t %s" % binary) |
Sasha Goldshtein | 2922861 | 2016-02-07 12:20:19 -0800 | [diff] [blame] | 69 | for raw_symbol in raw_symbols: |
Sasha Goldshtein | 751fce5 | 2016-02-08 02:57:02 -0800 | [diff] [blame] | 70 | # A typical line from objdump -t looks like this: |
| 71 | # 00000000004007f5 g F .text 000000000000010e main |
| 72 | # We only care about functions (F) in the .text segment. The first |
| 73 | # number is the start address, and the second number is the length. |
Sasha Goldshtein | 2922861 | 2016-02-07 12:20:19 -0800 | [diff] [blame] | 74 | parts = raw_symbol.split() |
| 75 | if len(parts) < 6 or parts[3] != ".text" or parts[2] != "F": |
| 76 | continue |
| 77 | sym_start = int(parts[0], 16) |
| 78 | sym_len = int(parts[4], 16) |
| 79 | sym_name = parts[5] |
| 80 | sym_ranges[sym_name] = (sym_start, sym_len) |
| 81 | self.ranges_cache[binary] = sym_ranges |
| 82 | return sym_ranges |
| 83 | |
| 84 | def _decode_sym(self, binary, offset): |
| 85 | sym_ranges = self._get_sym_ranges(binary) |
Sasha Goldshtein | 751fce5 | 2016-02-08 02:57:02 -0800 | [diff] [blame] | 86 | # Find the symbol that contains the specified offset. There might not be one. |
Sasha Goldshtein | 2922861 | 2016-02-07 12:20:19 -0800 | [diff] [blame] | 87 | for name, (start, length) in sym_ranges.items(): |
| 88 | if offset >= start and offset <= (start + length): |
| 89 | return "%s+0x%x" % (name, offset - start) |
| 90 | return "%x" % offset |
| 91 | |
| 92 | def _decode_addr(self, addr): |
| 93 | code_ranges = self._get_code_ranges() |
Sasha Goldshtein | 751fce5 | 2016-02-08 02:57:02 -0800 | [diff] [blame] | 94 | # Find the binary that contains the specified address. For .so files, look |
| 95 | # at the relative address; for the main executable, look at the absolute |
| 96 | # address. |
Sasha Goldshtein | 2922861 | 2016-02-07 12:20:19 -0800 | [diff] [blame] | 97 | for binary, (start, end) in code_ranges.items(): |
| 98 | if addr >= start and addr <= end: |
| 99 | offset = addr - start if binary.endswith(".so") else addr |
| 100 | return "%s [%s]" % (self._decode_sym(binary, offset), binary) |
| 101 | return "%x" % addr |
| 102 | |
Sasha Goldshtein | 751fce5 | 2016-02-08 02:57:02 -0800 | [diff] [blame] | 103 | def decode_stack(self, info, is_kernel_trace): |
Sasha Goldshtein | 2922861 | 2016-02-07 12:20:19 -0800 | [diff] [blame] | 104 | stack = "" |
| 105 | if info.num_frames <= 0: |
| 106 | return "???" |
| 107 | for i in range(0, info.num_frames): |
| 108 | addr = info.callstack[i] |
Sasha Goldshtein | 751fce5 | 2016-02-08 02:57:02 -0800 | [diff] [blame] | 109 | if is_kernel_trace: |
Sasha Goldshtein | 2922861 | 2016-02-07 12:20:19 -0800 | [diff] [blame] | 110 | stack += " %s [kernel] (%x) ;" % (self.bpf.ksym(addr), addr) |
| 111 | else: |
Sasha Goldshtein | 751fce5 | 2016-02-08 02:57:02 -0800 | [diff] [blame] | 112 | # At some point, we hope to have native BPF user-mode symbol |
| 113 | # decoding, but for now we have to use our own |
Sasha Goldshtein | 2922861 | 2016-02-07 12:20:19 -0800 | [diff] [blame] | 114 | stack += " %s (%x) ;" % (self._decode_addr(addr), addr) |
| 115 | return stack |
| 116 | |
Sasha Goldshtein | 751fce5 | 2016-02-08 02:57:02 -0800 | [diff] [blame] | 117 | def run_command_get_output(command): |
Sasha Goldshtein | 2922861 | 2016-02-07 12:20:19 -0800 | [diff] [blame] | 118 | p = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT) |
| 119 | return iter(p.stdout.readline, b'') |
| 120 | |
Sasha Goldshtein | 751fce5 | 2016-02-08 02:57:02 -0800 | [diff] [blame] | 121 | def run_command_get_pid(command): |
| 122 | p = subprocess.Popen(command.split()) |
| 123 | return p.pid |
| 124 | |
Sasha Goldshtein | 4f1ea67 | 2016-02-07 01:57:42 -0800 | [diff] [blame] | 125 | examples = """ |
| 126 | EXAMPLES: |
| 127 | |
Sasha Goldshtein | a7cc6c2 | 2016-02-07 12:03:54 -0800 | [diff] [blame] | 128 | ./memleak.py -p $(pidof allocs) |
Sasha Goldshtein | 4f1ea67 | 2016-02-07 01:57:42 -0800 | [diff] [blame] | 129 | Trace allocations and display a summary of "leaked" (outstanding) |
| 130 | allocations every 5 seconds |
Sasha Goldshtein | a7cc6c2 | 2016-02-07 12:03:54 -0800 | [diff] [blame] | 131 | ./memleak.py -p $(pidof allocs) -t |
Sasha Goldshtein | 4f1ea67 | 2016-02-07 01:57:42 -0800 | [diff] [blame] | 132 | Trace allocations and display each individual call to malloc/free |
Sasha Goldshtein | a7cc6c2 | 2016-02-07 12:03:54 -0800 | [diff] [blame] | 133 | ./memleak.py -p $(pidof allocs) -a -i 10 |
Sasha Goldshtein | 4f1ea67 | 2016-02-07 01:57:42 -0800 | [diff] [blame] | 134 | Trace allocations and display allocated addresses, sizes, and stacks |
| 135 | every 10 seconds for outstanding allocations |
Sasha Goldshtein | 2922861 | 2016-02-07 12:20:19 -0800 | [diff] [blame] | 136 | ./memleak.py -c "./allocs" |
| 137 | Run the specified command and trace its allocations |
Sasha Goldshtein | a7cc6c2 | 2016-02-07 12:03:54 -0800 | [diff] [blame] | 138 | ./memleak.py |
Sasha Goldshtein | 4f1ea67 | 2016-02-07 01:57:42 -0800 | [diff] [blame] | 139 | Trace allocations in kernel mode and display a summary of outstanding |
| 140 | allocations every 5 seconds |
Sasha Goldshtein | a7cc6c2 | 2016-02-07 12:03:54 -0800 | [diff] [blame] | 141 | ./memleak.py -o 60000 |
| 142 | Trace allocations in kernel mode and display a summary of outstanding |
| 143 | allocations that are at least one minute (60 seconds) old |
Sasha Goldshtein | 4f1ea67 | 2016-02-07 01:57:42 -0800 | [diff] [blame] | 144 | """ |
| 145 | |
| 146 | description = """ |
| 147 | Trace outstanding memory allocations that weren't freed. |
| 148 | Supports both user-mode allocations made with malloc/free and kernel-mode |
| 149 | allocations made with kmalloc/kfree. |
| 150 | """ |
| 151 | |
Sasha Goldshtein | a7cc6c2 | 2016-02-07 12:03:54 -0800 | [diff] [blame] | 152 | parser = argparse.ArgumentParser(description=description, |
| 153 | formatter_class=argparse.RawDescriptionHelpFormatter, |
| 154 | epilog=examples) |
| 155 | parser.add_argument("-p", "--pid", |
| 156 | help="the PID to trace; if not specified, trace kernel allocs") |
| 157 | parser.add_argument("-t", "--trace", action="store_true", |
| 158 | help="print trace messages for each alloc/free call") |
| 159 | parser.add_argument("-i", "--interval", default=5, |
| 160 | help="interval in seconds to print outstanding allocations") |
| 161 | parser.add_argument("-a", "--show-allocs", default=False, action="store_true", |
| 162 | help="show allocation addresses and sizes as well as call stacks") |
| 163 | parser.add_argument("-o", "--older", default=500, |
| 164 | help="prune allocations younger than this age in milliseconds") |
Sasha Goldshtein | 2922861 | 2016-02-07 12:20:19 -0800 | [diff] [blame] | 165 | parser.add_argument("-c", "--command", |
| 166 | help="execute and trace the specified command") |
Sasha Goldshtein | 4f1ea67 | 2016-02-07 01:57:42 -0800 | [diff] [blame] | 167 | |
| 168 | args = parser.parse_args() |
| 169 | |
| 170 | pid = -1 if args.pid is None else int(args.pid) |
Sasha Goldshtein | 2922861 | 2016-02-07 12:20:19 -0800 | [diff] [blame] | 171 | command = args.command |
| 172 | kernel_trace = (pid == -1 and command is None) |
Sasha Goldshtein | 4f1ea67 | 2016-02-07 01:57:42 -0800 | [diff] [blame] | 173 | trace_all = args.trace |
| 174 | interval = int(args.interval) |
Sasha Goldshtein | cfce311 | 2016-02-07 11:09:36 -0800 | [diff] [blame] | 175 | min_age_ns = 1e6*int(args.older) |
Sasha Goldshtein | 4f1ea67 | 2016-02-07 01:57:42 -0800 | [diff] [blame] | 176 | |
Sasha Goldshtein | 2922861 | 2016-02-07 12:20:19 -0800 | [diff] [blame] | 177 | if not command is None: |
Sasha Goldshtein | 751fce5 | 2016-02-08 02:57:02 -0800 | [diff] [blame] | 178 | print("Executing '%s' and tracing the resulting process." % command) |
| 179 | pid = run_command_get_pid(command) |
Sasha Goldshtein | 2922861 | 2016-02-07 12:20:19 -0800 | [diff] [blame] | 180 | |
Sasha Goldshtein | 4f1ea67 | 2016-02-07 01:57:42 -0800 | [diff] [blame] | 181 | bpf_source = open("memleak.c").read() |
| 182 | bpf_source = bpf_source.replace("SHOULD_PRINT", "1" if trace_all else "0") |
| 183 | |
| 184 | bpf_program = BPF(text=bpf_source) |
| 185 | |
| 186 | if not kernel_trace: |
| 187 | print("Attaching to malloc and free in pid %d, Ctrl+C to quit." % pid) |
| 188 | bpf_program.attach_uprobe(name="c", sym="malloc", fn_name="alloc_enter", pid=pid) |
| 189 | bpf_program.attach_uretprobe(name="c", sym="malloc", fn_name="alloc_exit", pid=pid) |
| 190 | bpf_program.attach_uprobe(name="c", sym="free", fn_name="free_enter", pid=pid) |
| 191 | else: |
| 192 | print("Attaching to kmalloc and kfree, Ctrl+C to quit.") |
| 193 | bpf_program.attach_kprobe(event="__kmalloc", fn_name="alloc_enter") |
| 194 | bpf_program.attach_kretprobe(event="__kmalloc", fn_name="alloc_exit") |
| 195 | bpf_program.attach_kprobe(event="kfree", fn_name="free_enter") |
| 196 | |
Sasha Goldshtein | 2922861 | 2016-02-07 12:20:19 -0800 | [diff] [blame] | 197 | decoder = StackDecoder(pid, bpf_program) |
Sasha Goldshtein | 4f1ea67 | 2016-02-07 01:57:42 -0800 | [diff] [blame] | 198 | |
| 199 | def print_outstanding(): |
| 200 | stacks = {} |
| 201 | print("*** Outstanding allocations:") |
| 202 | allocs = bpf_program.get_table("allocs") |
Sasha Goldshtein | 751fce5 | 2016-02-08 02:57:02 -0800 | [diff] [blame] | 203 | for address, info in sorted(allocs.items(), key=lambda a: a[1].size): |
Sasha Goldshtein | a7cc6c2 | 2016-02-07 12:03:54 -0800 | [diff] [blame] | 204 | if Time.monotonic_time() - min_age_ns < info.timestamp_ns: |
Sasha Goldshtein | cfce311 | 2016-02-07 11:09:36 -0800 | [diff] [blame] | 205 | continue |
Sasha Goldshtein | 751fce5 | 2016-02-08 02:57:02 -0800 | [diff] [blame] | 206 | stack = decoder.decode_stack(info, kernel_trace) |
Sasha Goldshtein | a7cc6c2 | 2016-02-07 12:03:54 -0800 | [diff] [blame] | 207 | if stack in stacks: stacks[stack] = (stacks[stack][0] + 1, stacks[stack][1] + info.size) |
| 208 | else: stacks[stack] = (1, info.size) |
Sasha Goldshtein | 4f1ea67 | 2016-02-07 01:57:42 -0800 | [diff] [blame] | 209 | if args.show_allocs: |
| 210 | print("\taddr = %x size = %s" % (address.value, info.size)) |
Sasha Goldshtein | 751fce5 | 2016-02-08 02:57:02 -0800 | [diff] [blame] | 211 | for stack, (count, size) in sorted(stacks.items(), key=lambda s: s[1][1]): |
Sasha Goldshtein | a7cc6c2 | 2016-02-07 12:03:54 -0800 | [diff] [blame] | 212 | print("\t%d bytes in %d allocations from stack\n\t\t%s" % (size, count, stack.replace(";", "\n\t\t"))) |
Sasha Goldshtein | 4f1ea67 | 2016-02-07 01:57:42 -0800 | [diff] [blame] | 213 | |
| 214 | while True: |
Sasha Goldshtein | 751fce5 | 2016-02-08 02:57:02 -0800 | [diff] [blame] | 215 | if trace_all: |
Sasha Goldshtein | 4f1ea67 | 2016-02-07 01:57:42 -0800 | [diff] [blame] | 216 | print bpf_program.trace_fields() |
| 217 | else: |
| 218 | try: |
| 219 | sleep(interval) |
| 220 | except KeyboardInterrupt: |
| 221 | exit() |
Sasha Goldshtein | 2922861 | 2016-02-07 12:20:19 -0800 | [diff] [blame] | 222 | decoder.refresh_code_ranges() |
Sasha Goldshtein | 4f1ea67 | 2016-02-07 01:57:42 -0800 | [diff] [blame] | 223 | print_outstanding() |
| 224 | |