blob: 710005069221ceea0fe4cb3b98f44f2ea95d2d57 [file] [log] [blame]
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -08001#!/usr/bin/env python
Sasha Goldshtein50459642016-02-10 08:35:20 -08002#
Sasha Goldshtein29e37d92016-02-14 06:56:07 -08003# memleak Trace and display outstanding allocations to detect
Sasha Goldshtein50459642016-02-10 08:35:20 -08004# memory leaks in user-mode processes and the kernel.
5#
Sasha Goldshtein29e37d92016-02-14 06:56:07 -08006# USAGE: memleak [-h] [-p PID] [-t] [-a] [-o OLDER] [-c COMMAND]
Sasha Goldshtein50459642016-02-10 08:35:20 -08007# [-s SAMPLE_RATE] [-d STACK_DEPTH] [-T TOP] [-z MIN_SIZE]
8# [-Z MAX_SIZE]
9# [interval] [count]
10#
Sasha Goldshtein43fa0412016-02-10 22:17:26 -080011# Licensed under the Apache License, Version 2.0 (the "License")
Sasha Goldshtein50459642016-02-10 08:35:20 -080012# Copyright (C) 2016 Sasha Goldshtein.
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -080013
14from bcc import BPF
15from time import sleep
Sasha Goldshteinc8148c82016-02-09 11:15:41 -080016from datetime import datetime
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -080017import argparse
18import subprocess
Sasha Goldshteincfce3112016-02-07 11:09:36 -080019import ctypes
20import os
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -080021
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -080022class Time(object):
Sasha Goldshtein33522d72016-02-08 03:39:44 -080023 # BPF timestamps come from the monotonic clock. To be able to filter
24 # and compare them from Python, we need to invoke clock_gettime.
25 # Adapted from http://stackoverflow.com/a/1205762
26 CLOCK_MONOTONIC_RAW = 4 # see <linux/time.h>
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -080027
Sasha Goldshtein33522d72016-02-08 03:39:44 -080028 class timespec(ctypes.Structure):
29 _fields_ = [
30 ('tv_sec', ctypes.c_long),
31 ('tv_nsec', ctypes.c_long)
32 ]
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -080033
Sasha Goldshtein33522d72016-02-08 03:39:44 -080034 librt = ctypes.CDLL('librt.so.1', use_errno=True)
35 clock_gettime = librt.clock_gettime
36 clock_gettime.argtypes = [ctypes.c_int, ctypes.POINTER(timespec)]
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -080037
Sasha Goldshtein33522d72016-02-08 03:39:44 -080038 @staticmethod
39 def monotonic_time():
40 t = Time.timespec()
41 if Time.clock_gettime(
42 Time.CLOCK_MONOTONIC_RAW, ctypes.pointer(t)) != 0:
43 errno_ = ctypes.get_errno()
44 raise OSError(errno_, os.strerror(errno_))
45 return t.tv_sec * 1e9 + t.tv_nsec
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -080046
Sasha Goldshtein29228612016-02-07 12:20:19 -080047class StackDecoder(object):
Sasha Goldshtein33522d72016-02-08 03:39:44 -080048 def __init__(self, pid, bpf):
49 self.pid = pid
50 self.bpf = bpf
51 self.ranges_cache = {}
52 self.refresh_code_ranges()
Sasha Goldshtein29228612016-02-07 12:20:19 -080053
Sasha Goldshtein33522d72016-02-08 03:39:44 -080054 def refresh_code_ranges(self):
55 if self.pid == -1:
56 return
57 self.code_ranges = self._get_code_ranges()
Sasha Goldshtein29228612016-02-07 12:20:19 -080058
Sasha Goldshtein33522d72016-02-08 03:39:44 -080059 @staticmethod
60 def _is_binary_segment(parts):
61 return len(parts) == 6 and \
Sasha Goldshtein521ab4f2016-02-08 05:48:31 -080062 parts[5][0] != '[' and 'x' in parts[1]
Sasha Goldshtein29228612016-02-07 12:20:19 -080063
Sasha Goldshtein33522d72016-02-08 03:39:44 -080064 def _get_code_ranges(self):
65 ranges = {}
66 raw_ranges = open("/proc/%d/maps" % self.pid).readlines()
67 # A typical line from /proc/PID/maps looks like this:
68 # 7f21b6635000-7f21b67eb000 r-xp ... /usr/lib64/libc-2.21.so
69 # We are looking for executable segments that have a .so file
70 # or the main executable. The first two lines are the range of
71 # that memory segment, which we index by binary name.
72 for raw_range in raw_ranges:
73 parts = raw_range.split()
74 if not StackDecoder._is_binary_segment(parts):
75 continue
76 binary = parts[5]
77 range_parts = parts[0].split('-')
78 addr_range = (int(range_parts[0], 16),
79 int(range_parts[1], 16))
80 ranges[binary] = addr_range
81 return ranges
Sasha Goldshtein29228612016-02-07 12:20:19 -080082
Sasha Goldshtein33522d72016-02-08 03:39:44 -080083 @staticmethod
84 def _is_function_symbol(parts):
85 return len(parts) == 6 and parts[3] == ".text" \
86 and parts[2] == "F"
Sasha Goldshtein29228612016-02-07 12:20:19 -080087
Sasha Goldshtein33522d72016-02-08 03:39:44 -080088 def _get_sym_ranges(self, binary):
89 if binary in self.ranges_cache:
90 return self.ranges_cache[binary]
91 sym_ranges = {}
92 raw_symbols = run_command_get_output("objdump -t %s" % binary)
93 for raw_symbol in raw_symbols:
94 # A typical line from objdump -t looks like this:
95 # 00000000004007f5 g F .text 000000000000010e main
96 # We only care about functions in the .text segment.
97 # The first number is the start address, and the second
98 # number is the length.
99 parts = raw_symbol.split()
100 if not StackDecoder._is_function_symbol(parts):
101 continue
102 sym_start = int(parts[0], 16)
103 sym_len = int(parts[4], 16)
104 sym_name = parts[5]
105 sym_ranges[sym_name] = (sym_start, sym_len)
106 self.ranges_cache[binary] = sym_ranges
107 return sym_ranges
Sasha Goldshtein29228612016-02-07 12:20:19 -0800108
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800109 def _decode_sym(self, binary, offset):
110 sym_ranges = self._get_sym_ranges(binary)
111 # Find the symbol that contains the specified offset.
112 # There might not be one.
113 for name, (start, length) in sym_ranges.items():
114 if offset >= start and offset <= (start + length):
115 return "%s+0x%x" % (name, offset - start)
116 return "%x" % offset
117
118 def _decode_addr(self, addr):
119 code_ranges = self._get_code_ranges()
120 # Find the binary that contains the specified address.
121 # For .so files, look at the relative address; for the main
122 # executable, look at the absolute address.
123 for binary, (start, end) in code_ranges.items():
124 if addr >= start and addr <= end:
125 offset = addr - start \
126 if binary.endswith(".so") else addr
127 return "%s [%s]" % (self._decode_sym(binary,
128 offset), binary)
129 return "%x" % addr
130
131 def decode_stack(self, info, is_kernel_trace):
132 stack = ""
133 if info.num_frames <= 0:
134 return "???"
135 for i in range(0, info.num_frames):
136 addr = info.callstack[i]
137 if is_kernel_trace:
138 stack += " %s [kernel] (%x) ;" % \
139 (self.bpf.ksym(addr), addr)
140 else:
141 # At some point, we hope to have native BPF
142 # user-mode symbol decoding, but for now we
143 # have to use our own.
144 stack += " %s (%x) ;" % \
145 (self._decode_addr(addr), addr)
146 return stack
Sasha Goldshtein29228612016-02-07 12:20:19 -0800147
Sasha Goldshtein751fce52016-02-08 02:57:02 -0800148def run_command_get_output(command):
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800149 p = subprocess.Popen(command.split(),
150 stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
151 return iter(p.stdout.readline, b'')
Sasha Goldshtein29228612016-02-07 12:20:19 -0800152
Sasha Goldshtein751fce52016-02-08 02:57:02 -0800153def run_command_get_pid(command):
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800154 p = subprocess.Popen(command.split())
155 return p.pid
Sasha Goldshtein751fce52016-02-08 02:57:02 -0800156
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800157examples = """
158EXAMPLES:
159
Sasha Goldshtein29e37d92016-02-14 06:56:07 -0800160./memleak -p $(pidof allocs)
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800161 Trace allocations and display a summary of "leaked" (outstanding)
162 allocations every 5 seconds
Sasha Goldshtein29e37d92016-02-14 06:56:07 -0800163./memleak -p $(pidof allocs) -t
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800164 Trace allocations and display each individual call to malloc/free
Sasha Goldshtein29e37d92016-02-14 06:56:07 -0800165./memleak -ap $(pidof allocs) 10
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800166 Trace allocations and display allocated addresses, sizes, and stacks
167 every 10 seconds for outstanding allocations
Sasha Goldshtein29e37d92016-02-14 06:56:07 -0800168./memleak -c "./allocs"
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800169 Run the specified command and trace its allocations
Sasha Goldshtein29e37d92016-02-14 06:56:07 -0800170./memleak
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800171 Trace allocations in kernel mode and display a summary of outstanding
172 allocations every 5 seconds
Sasha Goldshtein29e37d92016-02-14 06:56:07 -0800173./memleak -o 60000
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800174 Trace allocations in kernel mode and display a summary of outstanding
175 allocations that are at least one minute (60 seconds) old
Sasha Goldshtein29e37d92016-02-14 06:56:07 -0800176./memleak -s 5
Sasha Goldshtein521ab4f2016-02-08 05:48:31 -0800177 Trace roughly every 5th allocation, to reduce overhead
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800178"""
179
180description = """
181Trace outstanding memory allocations that weren't freed.
182Supports both user-mode allocations made with malloc/free and kernel-mode
183allocations made with kmalloc/kfree.
184"""
185
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -0800186parser = argparse.ArgumentParser(description=description,
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800187 formatter_class=argparse.RawDescriptionHelpFormatter,
188 epilog=examples)
Sasha Goldshteind2241f42016-02-09 06:23:10 -0800189parser.add_argument("-p", "--pid", type=int, default=-1,
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800190 help="the PID to trace; if not specified, trace kernel allocs")
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -0800191parser.add_argument("-t", "--trace", action="store_true",
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800192 help="print trace messages for each alloc/free call")
Sasha Goldshtein40e55ba2016-02-09 05:53:48 -0800193parser.add_argument("interval", nargs="?", default=5, type=int,
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800194 help="interval in seconds to print outstanding allocations")
Sasha Goldshtein40e55ba2016-02-09 05:53:48 -0800195parser.add_argument("count", nargs="?", type=int,
196 help="number of times to print the report before exiting")
Sasha Goldshteina7cc6c22016-02-07 12:03:54 -0800197parser.add_argument("-a", "--show-allocs", default=False, action="store_true",
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800198 help="show allocation addresses and sizes as well as call stacks")
Sasha Goldshtein40e55ba2016-02-09 05:53:48 -0800199parser.add_argument("-o", "--older", default=500, type=int,
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800200 help="prune allocations younger than this age in milliseconds")
Sasha Goldshtein29228612016-02-07 12:20:19 -0800201parser.add_argument("-c", "--command",
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800202 help="execute and trace the specified command")
Sasha Goldshtein40e55ba2016-02-09 05:53:48 -0800203parser.add_argument("-s", "--sample-rate", default=1, type=int,
Sasha Goldshtein521ab4f2016-02-08 05:48:31 -0800204 help="sample every N-th allocation to decrease the overhead")
Sasha Goldshteindcee30d2016-02-09 06:24:33 -0800205parser.add_argument("-d", "--stack-depth", default=10, type=int,
Sasha Goldshteind2241f42016-02-09 06:23:10 -0800206 help="maximum stack depth to capture")
Sasha Goldshteinc8148c82016-02-09 11:15:41 -0800207parser.add_argument("-T", "--top", type=int, default=10,
208 help="display only this many top allocating stacks (by size)")
Sasha Goldshtein50459642016-02-10 08:35:20 -0800209parser.add_argument("-z", "--min-size", type=int,
210 help="capture only allocations larger than this size")
211parser.add_argument("-Z", "--max-size", type=int,
212 help="capture only allocations smaller than this size")
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800213
214args = parser.parse_args()
215
Sasha Goldshteind2241f42016-02-09 06:23:10 -0800216pid = args.pid
Sasha Goldshtein29228612016-02-07 12:20:19 -0800217command = args.command
218kernel_trace = (pid == -1 and command is None)
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800219trace_all = args.trace
Sasha Goldshtein40e55ba2016-02-09 05:53:48 -0800220interval = args.interval
221min_age_ns = 1e6 * args.older
Sasha Goldshtein521ab4f2016-02-08 05:48:31 -0800222sample_every_n = args.sample_rate
Sasha Goldshtein40e55ba2016-02-09 05:53:48 -0800223num_prints = args.count
Sasha Goldshteind2241f42016-02-09 06:23:10 -0800224max_stack_size = args.stack_depth + 2
Sasha Goldshteinc8148c82016-02-09 11:15:41 -0800225top_stacks = args.top
Sasha Goldshtein50459642016-02-10 08:35:20 -0800226min_size = args.min_size
227max_size = args.max_size
228
229if min_size is not None and max_size is not None and min_size > max_size:
230 print("min_size (-z) can't be greater than max_size (-Z)")
231 exit(1)
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800232
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800233if command is not None:
234 print("Executing '%s' and tracing the resulting process." % command)
235 pid = run_command_get_pid(command)
Sasha Goldshtein29228612016-02-07 12:20:19 -0800236
Sasha Goldshtein43fa0412016-02-10 22:17:26 -0800237bpf_source = """
238#include <uapi/linux/ptrace.h>
239
240struct alloc_info_t {
241 u64 size;
242 u64 timestamp_ns;
243 int num_frames;
244 u64 callstack[MAX_STACK_SIZE];
245};
246
247BPF_HASH(sizes, u64);
248BPF_HASH(allocs, u64, struct alloc_info_t);
249
250// Adapted from https://github.com/iovisor/bcc/tools/offcputime.py
251static u64 get_frame(u64 *bp) {
252 if (*bp) {
253 // The following stack walker is x86_64 specific
254 u64 ret = 0;
255 if (bpf_probe_read(&ret, sizeof(ret), (void *)(*bp+8)))
256 return 0;
257 if (bpf_probe_read(bp, sizeof(*bp), (void *)*bp))
258 *bp = 0;
259 return ret;
260 }
261 return 0;
262}
263static int grab_stack(struct pt_regs *ctx, struct alloc_info_t *info)
264{
265 int depth = 0;
266 u64 bp = ctx->bp;
267 GRAB_ONE_FRAME
268 return depth;
269}
270
271int alloc_enter(struct pt_regs *ctx, size_t size)
272{
273 SIZE_FILTER
274 if (SAMPLE_EVERY_N > 1) {
275 u64 ts = bpf_ktime_get_ns();
276 if (ts % SAMPLE_EVERY_N != 0)
277 return 0;
278 }
279
280 u64 pid = bpf_get_current_pid_tgid();
281 u64 size64 = size;
282 sizes.update(&pid, &size64);
283
284 if (SHOULD_PRINT)
285 bpf_trace_printk("alloc entered, size = %u\\n", size);
286 return 0;
287}
288
289int alloc_exit(struct pt_regs *ctx)
290{
291 u64 address = ctx->ax;
292 u64 pid = bpf_get_current_pid_tgid();
293 u64* size64 = sizes.lookup(&pid);
294 struct alloc_info_t info = {0};
295
296 if (size64 == 0)
297 return 0; // missed alloc entry
298
299 info.size = *size64;
300 sizes.delete(&pid);
301
302 info.timestamp_ns = bpf_ktime_get_ns();
303 info.num_frames = grab_stack(ctx, &info) - 2;
304 allocs.update(&address, &info);
305
306 if (SHOULD_PRINT) {
307 bpf_trace_printk("alloc exited, size = %lu, result = %lx, frames = %d\\n",
308 info.size, address, info.num_frames);
309 }
310 return 0;
311}
312
313int free_enter(struct pt_regs *ctx, void *address)
314{
315 u64 addr = (u64)address;
316 struct alloc_info_t *info = allocs.lookup(&addr);
317 if (info == 0)
318 return 0;
319
320 allocs.delete(&addr);
321
322 if (SHOULD_PRINT) {
323 bpf_trace_printk("free entered, address = %lx, size = %lu\\n",
324 address, info->size);
325 }
326 return 0;
327}
328"""
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800329bpf_source = bpf_source.replace("SHOULD_PRINT", "1" if trace_all else "0")
Sasha Goldshtein521ab4f2016-02-08 05:48:31 -0800330bpf_source = bpf_source.replace("SAMPLE_EVERY_N", str(sample_every_n))
Sasha Goldshteind2241f42016-02-09 06:23:10 -0800331bpf_source = bpf_source.replace("GRAB_ONE_FRAME", max_stack_size *
332 "\tif (!(info->callstack[depth++] = get_frame(&bp))) return depth;\n")
333bpf_source = bpf_source.replace("MAX_STACK_SIZE", str(max_stack_size))
Sasha Goldshtein50459642016-02-10 08:35:20 -0800334
335size_filter = ""
336if min_size is not None and max_size is not None:
337 size_filter = "if (size < %d || size > %d) return 0;" % \
338 (min_size, max_size)
339elif min_size is not None:
340 size_filter = "if (size < %d) return 0;" % min_size
341elif max_size is not None:
342 size_filter = "if (size > %d) return 0;" % max_size
343bpf_source = bpf_source.replace("SIZE_FILTER", size_filter)
344
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800345bpf_program = BPF(text=bpf_source)
346
347if not kernel_trace:
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800348 print("Attaching to malloc and free in pid %d, Ctrl+C to quit." % pid)
349 bpf_program.attach_uprobe(name="c", sym="malloc",
350 fn_name="alloc_enter", pid=pid)
351 bpf_program.attach_uretprobe(name="c", sym="malloc",
352 fn_name="alloc_exit", pid=pid)
353 bpf_program.attach_uprobe(name="c", sym="free",
354 fn_name="free_enter", pid=pid)
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800355else:
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800356 print("Attaching to kmalloc and kfree, Ctrl+C to quit.")
357 bpf_program.attach_kprobe(event="__kmalloc", fn_name="alloc_enter")
358 bpf_program.attach_kretprobe(event="__kmalloc", fn_name="alloc_exit")
359 bpf_program.attach_kprobe(event="kfree", fn_name="free_enter")
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800360
Sasha Goldshtein29228612016-02-07 12:20:19 -0800361decoder = StackDecoder(pid, bpf_program)
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800362
363def print_outstanding():
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800364 stacks = {}
Sasha Goldshteinc8148c82016-02-09 11:15:41 -0800365 print("[%s] Top %d stacks with outstanding allocations:" %
366 (datetime.now().strftime("%H:%M:%S"), top_stacks))
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800367 allocs = bpf_program.get_table("allocs")
368 for address, info in sorted(allocs.items(), key=lambda a: a[1].size):
369 if Time.monotonic_time() - min_age_ns < info.timestamp_ns:
370 continue
371 stack = decoder.decode_stack(info, kernel_trace)
372 if stack in stacks:
373 stacks[stack] = (stacks[stack][0] + 1,
374 stacks[stack][1] + info.size)
375 else:
376 stacks[stack] = (1, info.size)
377 if args.show_allocs:
378 print("\taddr = %x size = %s" %
379 (address.value, info.size))
Sasha Goldshteinc8148c82016-02-09 11:15:41 -0800380 to_show = sorted(stacks.items(), key=lambda s: s[1][1])[-top_stacks:]
381 for stack, (count, size) in to_show:
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800382 print("\t%d bytes in %d allocations from stack\n\t\t%s" %
383 (size, count, stack.replace(";", "\n\t\t")))
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800384
Sasha Goldshtein40e55ba2016-02-09 05:53:48 -0800385count_so_far = 0
Sasha Goldshtein4f1ea672016-02-07 01:57:42 -0800386while True:
Sasha Goldshtein33522d72016-02-08 03:39:44 -0800387 if trace_all:
388 print bpf_program.trace_fields()
389 else:
390 try:
391 sleep(interval)
392 except KeyboardInterrupt:
393 exit()
394 decoder.refresh_code_ranges()
395 print_outstanding()
Sasha Goldshtein40e55ba2016-02-09 05:53:48 -0800396 count_so_far += 1
397 if num_prints is not None and count_so_far >= num_prints:
398 exit()