blob: b966ff01c2615a134862baa9ee6f9dd129cfe366 [file] [log] [blame]
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -08001#!/usr/bin/env python
2
3from bcc import BPF
4from time import sleep
5import argparse
6import subprocess
Sasha Goldshteincfce3112016-02-07 11:09:36 -08007import ctypes
8import os
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -08009
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -080010class Time(object):
Sasha Goldshtein33522d72016-02-08 03:39:44 -080011 # BPF timestamps come from the monotonic clock. To be able to filter
12 # and compare them from Python, we need to invoke clock_gettime.
13 # Adapted from http://stackoverflow.com/a/1205762
14 CLOCK_MONOTONIC_RAW = 4 # see <linux/time.h>
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -080015
Sasha Goldshtein33522d72016-02-08 03:39:44 -080016 class timespec(ctypes.Structure):
17 _fields_ = [
18 ('tv_sec', ctypes.c_long),
19 ('tv_nsec', ctypes.c_long)
20 ]
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -080021
Sasha Goldshtein33522d72016-02-08 03:39:44 -080022 librt = ctypes.CDLL('librt.so.1', use_errno=True)
23 clock_gettime = librt.clock_gettime
24 clock_gettime.argtypes = [ctypes.c_int, ctypes.POINTER(timespec)]
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -080025
Sasha Goldshtein33522d72016-02-08 03:39:44 -080026 @staticmethod
27 def monotonic_time():
28 t = Time.timespec()
29 if Time.clock_gettime(
30 Time.CLOCK_MONOTONIC_RAW, ctypes.pointer(t)) != 0:
31 errno_ = ctypes.get_errno()
32 raise OSError(errno_, os.strerror(errno_))
33 return t.tv_sec * 1e9 + t.tv_nsec
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -080034
Sasha Goldshtein29228612016-02-07 12:20:19 -080035class StackDecoder(object):
Sasha Goldshtein33522d72016-02-08 03:39:44 -080036 def __init__(self, pid, bpf):
37 self.pid = pid
38 self.bpf = bpf
39 self.ranges_cache = {}
40 self.refresh_code_ranges()
Sasha Goldshtein29228612016-02-07 12:20:19 -080041
Sasha Goldshtein33522d72016-02-08 03:39:44 -080042 def refresh_code_ranges(self):
43 if self.pid == -1:
44 return
45 self.code_ranges = self._get_code_ranges()
Sasha Goldshtein29228612016-02-07 12:20:19 -080046
Sasha Goldshtein33522d72016-02-08 03:39:44 -080047 @staticmethod
48 def _is_binary_segment(parts):
49 return len(parts) == 6 and \
50 parts[5][0] == '[' and 'x' in parts[1]
Sasha Goldshtein29228612016-02-07 12:20:19 -080051
Sasha Goldshtein33522d72016-02-08 03:39:44 -080052 def _get_code_ranges(self):
53 ranges = {}
54 raw_ranges = open("/proc/%d/maps" % self.pid).readlines()
55 # A typical line from /proc/PID/maps looks like this:
56 # 7f21b6635000-7f21b67eb000 r-xp ... /usr/lib64/libc-2.21.so
57 # We are looking for executable segments that have a .so file
58 # or the main executable. The first two lines are the range of
59 # that memory segment, which we index by binary name.
60 for raw_range in raw_ranges:
61 parts = raw_range.split()
62 if not StackDecoder._is_binary_segment(parts):
63 continue
64 binary = parts[5]
65 range_parts = parts[0].split('-')
66 addr_range = (int(range_parts[0], 16),
67 int(range_parts[1], 16))
68 ranges[binary] = addr_range
69 return ranges
Sasha Goldshtein29228612016-02-07 12:20:19 -080070
Sasha Goldshtein33522d72016-02-08 03:39:44 -080071 @staticmethod
72 def _is_function_symbol(parts):
73 return len(parts) == 6 and parts[3] == ".text" \
74 and parts[2] == "F"
Sasha Goldshtein29228612016-02-07 12:20:19 -080075
Sasha Goldshtein33522d72016-02-08 03:39:44 -080076 def _get_sym_ranges(self, binary):
77 if binary in self.ranges_cache:
78 return self.ranges_cache[binary]
79 sym_ranges = {}
80 raw_symbols = run_command_get_output("objdump -t %s" % binary)
81 for raw_symbol in raw_symbols:
82 # A typical line from objdump -t looks like this:
83 # 00000000004007f5 g F .text 000000000000010e main
84 # We only care about functions in the .text segment.
85 # The first number is the start address, and the second
86 # number is the length.
87 parts = raw_symbol.split()
88 if not StackDecoder._is_function_symbol(parts):
89 continue
90 sym_start = int(parts[0], 16)
91 sym_len = int(parts[4], 16)
92 sym_name = parts[5]
93 sym_ranges[sym_name] = (sym_start, sym_len)
94 self.ranges_cache[binary] = sym_ranges
95 return sym_ranges
Sasha Goldshtein29228612016-02-07 12:20:19 -080096
Sasha Goldshtein33522d72016-02-08 03:39:44 -080097 def _decode_sym(self, binary, offset):
98 sym_ranges = self._get_sym_ranges(binary)
99 # Find the symbol that contains the specified offset.
100 # There might not be one.
101 for name, (start, length) in sym_ranges.items():
102 if offset >= start and offset <= (start + length):
103 return "%s+0x%x" % (name, offset - start)
104 return "%x" % offset
105
106 def _decode_addr(self, addr):
107 code_ranges = self._get_code_ranges()
108 # Find the binary that contains the specified address.
109 # For .so files, look at the relative address; for the main
110 # executable, look at the absolute address.
111 for binary, (start, end) in code_ranges.items():
112 if addr >= start and addr <= end:
113 offset = addr - start \
114 if binary.endswith(".so") else addr
115 return "%s [%s]" % (self._decode_sym(binary,
116 offset), binary)
117 return "%x" % addr
118
119 def decode_stack(self, info, is_kernel_trace):
120 stack = ""
121 if info.num_frames <= 0:
122 return "???"
123 for i in range(0, info.num_frames):
124 addr = info.callstack[i]
125 if is_kernel_trace:
126 stack += " %s [kernel] (%x) ;" % \
127 (self.bpf.ksym(addr), addr)
128 else:
129 # At some point, we hope to have native BPF
130 # user-mode symbol decoding, but for now we
131 # have to use our own.
132 stack += " %s (%x) ;" % \
133 (self._decode_addr(addr), addr)
134 return stack
Sasha Goldshtein29228612016-02-07 12:20:19 -0800135
Sasha Goldshtein751fce52016-02-08 02:57:02 -0800136def run_command_get_output(command):
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800137 p = subprocess.Popen(command.split(),
138 stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
139 return iter(p.stdout.readline, b'')
Sasha Goldshtein29228612016-02-07 12:20:19 -0800140
Sasha Goldshtein751fce52016-02-08 02:57:02 -0800141def run_command_get_pid(command):
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800142 p = subprocess.Popen(command.split())
143 return p.pid
Sasha Goldshtein751fce52016-02-08 02:57:02 -0800144
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800145examples = """
146EXAMPLES:
147
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -0800148./memleak.py -p $(pidof allocs)
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800149 Trace allocations and display a summary of "leaked" (outstanding)
150 allocations every 5 seconds
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -0800151./memleak.py -p $(pidof allocs) -t
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800152 Trace allocations and display each individual call to malloc/free
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -0800153./memleak.py -p $(pidof allocs) -a -i 10
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800154 Trace allocations and display allocated addresses, sizes, and stacks
155 every 10 seconds for outstanding allocations
Sasha Goldshtein29228612016-02-07 12:20:19 -0800156./memleak.py -c "./allocs"
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800157 Run the specified command and trace its allocations
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -0800158./memleak.py
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800159 Trace allocations in kernel mode and display a summary of outstanding
160 allocations every 5 seconds
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -0800161./memleak.py -o 60000
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800162 Trace allocations in kernel mode and display a summary of outstanding
163 allocations that are at least one minute (60 seconds) old
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800164"""
165
166description = """
167Trace outstanding memory allocations that weren't freed.
168Supports both user-mode allocations made with malloc/free and kernel-mode
169allocations made with kmalloc/kfree.
170"""
171
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -0800172parser = argparse.ArgumentParser(description=description,
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800173 formatter_class=argparse.RawDescriptionHelpFormatter,
174 epilog=examples)
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -0800175parser.add_argument("-p", "--pid",
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800176 help="the PID to trace; if not specified, trace kernel allocs")
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -0800177parser.add_argument("-t", "--trace", action="store_true",
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800178 help="print trace messages for each alloc/free call")
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -0800179parser.add_argument("-i", "--interval", default=5,
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800180 help="interval in seconds to print outstanding allocations")
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -0800181parser.add_argument("-a", "--show-allocs", default=False, action="store_true",
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800182 help="show allocation addresses and sizes as well as call stacks")
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -0800183parser.add_argument("-o", "--older", default=500,
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800184 help="prune allocations younger than this age in milliseconds")
Sasha Goldshtein29228612016-02-07 12:20:19 -0800185parser.add_argument("-c", "--command",
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800186 help="execute and trace the specified command")
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800187
188args = parser.parse_args()
189
190pid = -1 if args.pid is None else int(args.pid)
Sasha Goldshtein29228612016-02-07 12:20:19 -0800191command = args.command
192kernel_trace = (pid == -1 and command is None)
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800193trace_all = args.trace
194interval = int(args.interval)
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800195min_age_ns = 1e6 * int(args.older)
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800196
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800197if command is not None:
198 print("Executing '%s' and tracing the resulting process." % command)
199 pid = run_command_get_pid(command)
Sasha Goldshtein29228612016-02-07 12:20:19 -0800200
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800201bpf_source = open("memleak.c").read()
202bpf_source = bpf_source.replace("SHOULD_PRINT", "1" if trace_all else "0")
203
204bpf_program = BPF(text=bpf_source)
205
206if not kernel_trace:
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800207 print("Attaching to malloc and free in pid %d, Ctrl+C to quit." % pid)
208 bpf_program.attach_uprobe(name="c", sym="malloc",
209 fn_name="alloc_enter", pid=pid)
210 bpf_program.attach_uretprobe(name="c", sym="malloc",
211 fn_name="alloc_exit", pid=pid)
212 bpf_program.attach_uprobe(name="c", sym="free",
213 fn_name="free_enter", pid=pid)
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800214else:
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800215 print("Attaching to kmalloc and kfree, Ctrl+C to quit.")
216 bpf_program.attach_kprobe(event="__kmalloc", fn_name="alloc_enter")
217 bpf_program.attach_kretprobe(event="__kmalloc", fn_name="alloc_exit")
218 bpf_program.attach_kprobe(event="kfree", fn_name="free_enter")
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800219
Sasha Goldshtein29228612016-02-07 12:20:19 -0800220decoder = StackDecoder(pid, bpf_program)
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800221
222def print_outstanding():
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800223 stacks = {}
224 print("*** Outstanding allocations:")
225 allocs = bpf_program.get_table("allocs")
226 for address, info in sorted(allocs.items(), key=lambda a: a[1].size):
227 if Time.monotonic_time() - min_age_ns < info.timestamp_ns:
228 continue
229 stack = decoder.decode_stack(info, kernel_trace)
230 if stack in stacks:
231 stacks[stack] = (stacks[stack][0] + 1,
232 stacks[stack][1] + info.size)
233 else:
234 stacks[stack] = (1, info.size)
235 if args.show_allocs:
236 print("\taddr = %x size = %s" %
237 (address.value, info.size))
238 for stack, (count, size) in sorted(stacks.items(),
239 key=lambda s: s[1][1]):
240 print("\t%d bytes in %d allocations from stack\n\t\t%s" %
241 (size, count, stack.replace(";", "\n\t\t")))
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800242
243while True:
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800244 if trace_all:
245 print bpf_program.trace_fields()
246 else:
247 try:
248 sleep(interval)
249 except KeyboardInterrupt:
250 exit()
251 decoder.refresh_code_ranges()
252 print_outstanding()