blob: 7ab6b7cefba4577afc3b9a3fac6ccf747db41612 [file] [log] [blame]
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -08001#!/usr/bin/env python
Sasha Goldshtein50459642016-02-10 08:35:20 -08002#
3# memleak.py Trace and display outstanding allocations to detect
4# memory leaks in user-mode processes and the kernel.
5#
6# USAGE: memleak.py [-h] [-p PID] [-t] [-a] [-o OLDER] [-c COMMAND]
7# [-s SAMPLE_RATE] [-d STACK_DEPTH] [-T TOP] [-z MIN_SIZE]
8# [-Z MAX_SIZE]
9# [interval] [count]
10#
11# Copyright (C) 2016 Sasha Goldshtein.
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -080012
13from bcc import BPF
14from time import sleep
Sasha Goldshteinc8148c82016-02-09 11:15:41 -080015from datetime import datetime
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -080016import argparse
17import subprocess
Sasha Goldshteincfce3112016-02-07 11:09:36 -080018import ctypes
19import os
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -080020
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -080021class Time(object):
Sasha Goldshtein33522d72016-02-08 03:39:44 -080022 # BPF timestamps come from the monotonic clock. To be able to filter
23 # and compare them from Python, we need to invoke clock_gettime.
24 # Adapted from http://stackoverflow.com/a/1205762
25 CLOCK_MONOTONIC_RAW = 4 # see <linux/time.h>
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -080026
Sasha Goldshtein33522d72016-02-08 03:39:44 -080027 class timespec(ctypes.Structure):
28 _fields_ = [
29 ('tv_sec', ctypes.c_long),
30 ('tv_nsec', ctypes.c_long)
31 ]
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -080032
Sasha Goldshtein33522d72016-02-08 03:39:44 -080033 librt = ctypes.CDLL('librt.so.1', use_errno=True)
34 clock_gettime = librt.clock_gettime
35 clock_gettime.argtypes = [ctypes.c_int, ctypes.POINTER(timespec)]
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -080036
Sasha Goldshtein33522d72016-02-08 03:39:44 -080037 @staticmethod
38 def monotonic_time():
39 t = Time.timespec()
40 if Time.clock_gettime(
41 Time.CLOCK_MONOTONIC_RAW, ctypes.pointer(t)) != 0:
42 errno_ = ctypes.get_errno()
43 raise OSError(errno_, os.strerror(errno_))
44 return t.tv_sec * 1e9 + t.tv_nsec
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -080045
Sasha Goldshtein29228612016-02-07 12:20:19 -080046class StackDecoder(object):
Sasha Goldshtein33522d72016-02-08 03:39:44 -080047 def __init__(self, pid, bpf):
48 self.pid = pid
49 self.bpf = bpf
50 self.ranges_cache = {}
51 self.refresh_code_ranges()
Sasha Goldshtein29228612016-02-07 12:20:19 -080052
Sasha Goldshtein33522d72016-02-08 03:39:44 -080053 def refresh_code_ranges(self):
54 if self.pid == -1:
55 return
56 self.code_ranges = self._get_code_ranges()
Sasha Goldshtein29228612016-02-07 12:20:19 -080057
Sasha Goldshtein33522d72016-02-08 03:39:44 -080058 @staticmethod
59 def _is_binary_segment(parts):
60 return len(parts) == 6 and \
Sasha Goldshtein521ab4f2016-02-08 05:48:31 -080061 parts[5][0] != '[' and 'x' in parts[1]
Sasha Goldshtein29228612016-02-07 12:20:19 -080062
Sasha Goldshtein33522d72016-02-08 03:39:44 -080063 def _get_code_ranges(self):
64 ranges = {}
65 raw_ranges = open("/proc/%d/maps" % self.pid).readlines()
66 # A typical line from /proc/PID/maps looks like this:
67 # 7f21b6635000-7f21b67eb000 r-xp ... /usr/lib64/libc-2.21.so
68 # We are looking for executable segments that have a .so file
69 # or the main executable. The first two lines are the range of
70 # that memory segment, which we index by binary name.
71 for raw_range in raw_ranges:
72 parts = raw_range.split()
73 if not StackDecoder._is_binary_segment(parts):
74 continue
75 binary = parts[5]
76 range_parts = parts[0].split('-')
77 addr_range = (int(range_parts[0], 16),
78 int(range_parts[1], 16))
79 ranges[binary] = addr_range
80 return ranges
Sasha Goldshtein29228612016-02-07 12:20:19 -080081
Sasha Goldshtein33522d72016-02-08 03:39:44 -080082 @staticmethod
83 def _is_function_symbol(parts):
84 return len(parts) == 6 and parts[3] == ".text" \
85 and parts[2] == "F"
Sasha Goldshtein29228612016-02-07 12:20:19 -080086
Sasha Goldshtein33522d72016-02-08 03:39:44 -080087 def _get_sym_ranges(self, binary):
88 if binary in self.ranges_cache:
89 return self.ranges_cache[binary]
90 sym_ranges = {}
91 raw_symbols = run_command_get_output("objdump -t %s" % binary)
92 for raw_symbol in raw_symbols:
93 # A typical line from objdump -t looks like this:
94 # 00000000004007f5 g F .text 000000000000010e main
95 # We only care about functions in the .text segment.
96 # The first number is the start address, and the second
97 # number is the length.
98 parts = raw_symbol.split()
99 if not StackDecoder._is_function_symbol(parts):
100 continue
101 sym_start = int(parts[0], 16)
102 sym_len = int(parts[4], 16)
103 sym_name = parts[5]
104 sym_ranges[sym_name] = (sym_start, sym_len)
105 self.ranges_cache[binary] = sym_ranges
106 return sym_ranges
Sasha Goldshtein29228612016-02-07 12:20:19 -0800107
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800108 def _decode_sym(self, binary, offset):
109 sym_ranges = self._get_sym_ranges(binary)
110 # Find the symbol that contains the specified offset.
111 # There might not be one.
112 for name, (start, length) in sym_ranges.items():
113 if offset >= start and offset <= (start + length):
114 return "%s+0x%x" % (name, offset - start)
115 return "%x" % offset
116
117 def _decode_addr(self, addr):
118 code_ranges = self._get_code_ranges()
119 # Find the binary that contains the specified address.
120 # For .so files, look at the relative address; for the main
121 # executable, look at the absolute address.
122 for binary, (start, end) in code_ranges.items():
123 if addr >= start and addr <= end:
124 offset = addr - start \
125 if binary.endswith(".so") else addr
126 return "%s [%s]" % (self._decode_sym(binary,
127 offset), binary)
128 return "%x" % addr
129
130 def decode_stack(self, info, is_kernel_trace):
131 stack = ""
132 if info.num_frames <= 0:
133 return "???"
134 for i in range(0, info.num_frames):
135 addr = info.callstack[i]
136 if is_kernel_trace:
137 stack += " %s [kernel] (%x) ;" % \
138 (self.bpf.ksym(addr), addr)
139 else:
140 # At some point, we hope to have native BPF
141 # user-mode symbol decoding, but for now we
142 # have to use our own.
143 stack += " %s (%x) ;" % \
144 (self._decode_addr(addr), addr)
145 return stack
Sasha Goldshtein29228612016-02-07 12:20:19 -0800146
Sasha Goldshtein751fce52016-02-08 02:57:02 -0800147def run_command_get_output(command):
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800148 p = subprocess.Popen(command.split(),
149 stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
150 return iter(p.stdout.readline, b'')
Sasha Goldshtein29228612016-02-07 12:20:19 -0800151
Sasha Goldshtein751fce52016-02-08 02:57:02 -0800152def run_command_get_pid(command):
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800153 p = subprocess.Popen(command.split())
154 return p.pid
Sasha Goldshtein751fce52016-02-08 02:57:02 -0800155
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800156examples = """
157EXAMPLES:
158
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -0800159./memleak.py -p $(pidof allocs)
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800160 Trace allocations and display a summary of "leaked" (outstanding)
161 allocations every 5 seconds
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -0800162./memleak.py -p $(pidof allocs) -t
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800163 Trace allocations and display each individual call to malloc/free
Sasha Goldshtein75ba13f2016-02-09 06:03:46 -0800164./memleak.py -ap $(pidof allocs) 10
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800165 Trace allocations and display allocated addresses, sizes, and stacks
166 every 10 seconds for outstanding allocations
Sasha Goldshtein29228612016-02-07 12:20:19 -0800167./memleak.py -c "./allocs"
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800168 Run the specified command and trace its allocations
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -0800169./memleak.py
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800170 Trace allocations in kernel mode and display a summary of outstanding
171 allocations every 5 seconds
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -0800172./memleak.py -o 60000
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800173 Trace allocations in kernel mode and display a summary of outstanding
174 allocations that are at least one minute (60 seconds) old
Sasha Goldshtein521ab4f2016-02-08 05:48:31 -0800175./memleak.py -s 5
176 Trace roughly every 5th allocation, to reduce overhead
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800177"""
178
179description = """
180Trace outstanding memory allocations that weren't freed.
181Supports both user-mode allocations made with malloc/free and kernel-mode
182allocations made with kmalloc/kfree.
183"""
184
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -0800185parser = argparse.ArgumentParser(description=description,
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800186 formatter_class=argparse.RawDescriptionHelpFormatter,
187 epilog=examples)
Sasha Goldshteind2241f42016-02-09 06:23:10 -0800188parser.add_argument("-p", "--pid", type=int, default=-1,
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800189 help="the PID to trace; if not specified, trace kernel allocs")
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -0800190parser.add_argument("-t", "--trace", action="store_true",
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800191 help="print trace messages for each alloc/free call")
Sasha Goldshtein40e55ba2016-02-09 05:53:48 -0800192parser.add_argument("interval", nargs="?", default=5, type=int,
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800193 help="interval in seconds to print outstanding allocations")
Sasha Goldshtein40e55ba2016-02-09 05:53:48 -0800194parser.add_argument("count", nargs="?", type=int,
195 help="number of times to print the report before exiting")
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -0800196parser.add_argument("-a", "--show-allocs", default=False, action="store_true",
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800197 help="show allocation addresses and sizes as well as call stacks")
Sasha Goldshtein40e55ba2016-02-09 05:53:48 -0800198parser.add_argument("-o", "--older", default=500, type=int,
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800199 help="prune allocations younger than this age in milliseconds")
Sasha Goldshtein29228612016-02-07 12:20:19 -0800200parser.add_argument("-c", "--command",
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800201 help="execute and trace the specified command")
Sasha Goldshtein40e55ba2016-02-09 05:53:48 -0800202parser.add_argument("-s", "--sample-rate", default=1, type=int,
Sasha Goldshtein521ab4f2016-02-08 05:48:31 -0800203 help="sample every N-th allocation to decrease the overhead")
Sasha Goldshteindcee30d2016-02-09 06:24:33 -0800204parser.add_argument("-d", "--stack-depth", default=10, type=int,
Sasha Goldshteind2241f42016-02-09 06:23:10 -0800205 help="maximum stack depth to capture")
Sasha Goldshteinc8148c82016-02-09 11:15:41 -0800206parser.add_argument("-T", "--top", type=int, default=10,
207 help="display only this many top allocating stacks (by size)")
Sasha Goldshtein50459642016-02-10 08:35:20 -0800208parser.add_argument("-z", "--min-size", type=int,
209 help="capture only allocations larger than this size")
210parser.add_argument("-Z", "--max-size", type=int,
211 help="capture only allocations smaller than this size")
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800212
213args = parser.parse_args()
214
Sasha Goldshteind2241f42016-02-09 06:23:10 -0800215pid = args.pid
Sasha Goldshtein29228612016-02-07 12:20:19 -0800216command = args.command
217kernel_trace = (pid == -1 and command is None)
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800218trace_all = args.trace
Sasha Goldshtein40e55ba2016-02-09 05:53:48 -0800219interval = args.interval
220min_age_ns = 1e6 * args.older
Sasha Goldshtein521ab4f2016-02-08 05:48:31 -0800221sample_every_n = args.sample_rate
Sasha Goldshtein40e55ba2016-02-09 05:53:48 -0800222num_prints = args.count
Sasha Goldshteind2241f42016-02-09 06:23:10 -0800223max_stack_size = args.stack_depth + 2
Sasha Goldshteinc8148c82016-02-09 11:15:41 -0800224top_stacks = args.top
Sasha Goldshtein50459642016-02-10 08:35:20 -0800225min_size = args.min_size
226max_size = args.max_size
227
228if min_size is not None and max_size is not None and min_size > max_size:
229 print("min_size (-z) can't be greater than max_size (-Z)")
230 exit(1)
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800231
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800232if command is not None:
233 print("Executing '%s' and tracing the resulting process." % command)
234 pid = run_command_get_pid(command)
Sasha Goldshtein29228612016-02-07 12:20:19 -0800235
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800236bpf_source = open("memleak.c").read()
237bpf_source = bpf_source.replace("SHOULD_PRINT", "1" if trace_all else "0")
Sasha Goldshtein521ab4f2016-02-08 05:48:31 -0800238bpf_source = bpf_source.replace("SAMPLE_EVERY_N", str(sample_every_n))
Sasha Goldshteind2241f42016-02-09 06:23:10 -0800239bpf_source = bpf_source.replace("GRAB_ONE_FRAME", max_stack_size *
240 "\tif (!(info->callstack[depth++] = get_frame(&bp))) return depth;\n")
241bpf_source = bpf_source.replace("MAX_STACK_SIZE", str(max_stack_size))
Sasha Goldshtein50459642016-02-10 08:35:20 -0800242
243size_filter = ""
244if min_size is not None and max_size is not None:
245 size_filter = "if (size < %d || size > %d) return 0;" % \
246 (min_size, max_size)
247elif min_size is not None:
248 size_filter = "if (size < %d) return 0;" % min_size
249elif max_size is not None:
250 size_filter = "if (size > %d) return 0;" % max_size
251bpf_source = bpf_source.replace("SIZE_FILTER", size_filter)
252
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800253bpf_program = BPF(text=bpf_source)
254
255if not kernel_trace:
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800256 print("Attaching to malloc and free in pid %d, Ctrl+C to quit." % pid)
257 bpf_program.attach_uprobe(name="c", sym="malloc",
258 fn_name="alloc_enter", pid=pid)
259 bpf_program.attach_uretprobe(name="c", sym="malloc",
260 fn_name="alloc_exit", pid=pid)
261 bpf_program.attach_uprobe(name="c", sym="free",
262 fn_name="free_enter", pid=pid)
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800263else:
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800264 print("Attaching to kmalloc and kfree, Ctrl+C to quit.")
265 bpf_program.attach_kprobe(event="__kmalloc", fn_name="alloc_enter")
266 bpf_program.attach_kretprobe(event="__kmalloc", fn_name="alloc_exit")
267 bpf_program.attach_kprobe(event="kfree", fn_name="free_enter")
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800268
Sasha Goldshtein29228612016-02-07 12:20:19 -0800269decoder = StackDecoder(pid, bpf_program)
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800270
271def print_outstanding():
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800272 stacks = {}
Sasha Goldshteinc8148c82016-02-09 11:15:41 -0800273 print("[%s] Top %d stacks with outstanding allocations:" %
274 (datetime.now().strftime("%H:%M:%S"), top_stacks))
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800275 allocs = bpf_program.get_table("allocs")
276 for address, info in sorted(allocs.items(), key=lambda a: a[1].size):
277 if Time.monotonic_time() - min_age_ns < info.timestamp_ns:
278 continue
279 stack = decoder.decode_stack(info, kernel_trace)
280 if stack in stacks:
281 stacks[stack] = (stacks[stack][0] + 1,
282 stacks[stack][1] + info.size)
283 else:
284 stacks[stack] = (1, info.size)
285 if args.show_allocs:
286 print("\taddr = %x size = %s" %
287 (address.value, info.size))
Sasha Goldshteinc8148c82016-02-09 11:15:41 -0800288 to_show = sorted(stacks.items(), key=lambda s: s[1][1])[-top_stacks:]
289 for stack, (count, size) in to_show:
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800290 print("\t%d bytes in %d allocations from stack\n\t\t%s" %
291 (size, count, stack.replace(";", "\n\t\t")))
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800292
Sasha Goldshtein40e55ba2016-02-09 05:53:48 -0800293count_so_far = 0
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800294while True:
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800295 if trace_all:
296 print bpf_program.trace_fields()
297 else:
298 try:
299 sleep(interval)
300 except KeyboardInterrupt:
301 exit()
302 decoder.refresh_code_ranges()
303 print_outstanding()
Sasha Goldshtein40e55ba2016-02-09 05:53:48 -0800304 count_so_far += 1
305 if num_prints is not None and count_so_far >= num_prints:
306 exit()