blob: 84f363ea2080e4ecda918b61f7a50703528714d0 [file] [log] [blame]
Brendan Gregg38cef482016-01-15 17:26:30 -08001#!/usr/bin/python
2#
3# offcputime Summarize off-CPU time by kernel stack trace
4# For Linux, uses BCC, eBPF.
5#
Brendan Gregg62f4c282016-01-30 11:05:40 -08006# USAGE: offcputime [-h] [-u] [-p PID] [-v] [-f] [duration]
Brendan Gregg38cef482016-01-15 17:26:30 -08007#
8# The current implementation uses an unrolled loop for x86_64, and was written
9# as a proof of concept. This implementation should be replaced in the future
10# with an appropriate bpf_ call, when available.
11#
Brendan Gregg38cef482016-01-15 17:26:30 -080012# Copyright 2016 Netflix, Inc.
13# Licensed under the Apache License, Version 2.0 (the "License")
14#
15# 13-Jan-2016 Brendan Gregg Created this.
16
17from __future__ import print_function
18from bcc import BPF
19from time import sleep, strftime
20import argparse
21import signal
22
23# arguments
24examples = """examples:
25 ./offcputime # trace off-CPU stack time until Ctrl-C
26 ./offcputime 5 # trace for 5 seconds only
27 ./offcputime -f 5 # 5 seconds, and output in folded format
Brendan Greggd364d042016-01-19 17:12:52 -080028 ./offcputime -u # don't include kernel threads (user only)
Brendan Gregg38cef482016-01-15 17:26:30 -080029 ./offcputime -p 185 # trace fo PID 185 only
30"""
31parser = argparse.ArgumentParser(
32 description="Summarize off-CPU time by kernel stack trace",
33 formatter_class=argparse.RawDescriptionHelpFormatter,
34 epilog=examples)
Brendan Greggd364d042016-01-19 17:12:52 -080035parser.add_argument("-u", "--useronly", action="store_true",
36 help="user threads only (no kernel threads)")
Brendan Gregg38cef482016-01-15 17:26:30 -080037parser.add_argument("-p", "--pid",
38 help="trace this PID only")
39parser.add_argument("-v", "--verbose", action="store_true",
40 help="show raw addresses")
41parser.add_argument("-f", "--folded", action="store_true",
42 help="output folded format")
43parser.add_argument("duration", nargs="?", default=99999999,
44 help="duration of trace, in seconds")
45args = parser.parse_args()
46folded = args.folded
47duration = int(args.duration)
48debug = 0
Brendan Greggd364d042016-01-19 17:12:52 -080049if args.pid and args.useronly:
50 print("ERROR: use either -p or -u.")
51 exit()
Brendan Gregg38cef482016-01-15 17:26:30 -080052
53# signal handler
54def signal_ignore(signal, frame):
55 print()
56
Brendan Greggd364d042016-01-19 17:12:52 -080057# define BPF program
Brendan Gregg38cef482016-01-15 17:26:30 -080058bpf_text = """
59#include <uapi/linux/ptrace.h>
60#include <linux/sched.h>
61
Brendan Gregg38cef482016-01-15 17:26:30 -080062#define MINBLOCK_US 1
63
64struct key_t {
65 char name[TASK_COMM_LEN];
Vicent Martie82fb1b2016-03-25 17:21:44 +010066 int stack_id;
Brendan Gregg38cef482016-01-15 17:26:30 -080067};
68BPF_HASH(counts, struct key_t);
69BPF_HASH(start, u32);
Vicent Martie82fb1b2016-03-25 17:21:44 +010070BPF_STACK_TRACE(stack_traces, 1024)
Brendan Gregg38cef482016-01-15 17:26:30 -080071
Brendan Greggd364d042016-01-19 17:12:52 -080072int oncpu(struct pt_regs *ctx, struct task_struct *prev) {
73 u32 pid;
74 u64 ts, *tsp;
Brendan Gregg38cef482016-01-15 17:26:30 -080075
Brendan Greggd364d042016-01-19 17:12:52 -080076 // record previous thread sleep time
77 if (FILTER) {
78 pid = prev->pid;
79 ts = bpf_ktime_get_ns();
80 start.update(&pid, &ts);
81 }
Brendan Gregg38cef482016-01-15 17:26:30 -080082
Brendan Greggd364d042016-01-19 17:12:52 -080083 // calculate current thread's delta time
84 pid = bpf_get_current_pid_tgid();
Brendan Gregg38cef482016-01-15 17:26:30 -080085 tsp = start.lookup(&pid);
86 if (tsp == 0)
Brendan Greggf7471142016-01-19 14:40:41 -080087 return 0; // missed start or filtered
88 u64 delta = bpf_ktime_get_ns() - *tsp;
Brendan Gregg38cef482016-01-15 17:26:30 -080089 start.delete(&pid);
90 delta = delta / 1000;
91 if (delta < MINBLOCK_US)
92 return 0;
93
Brendan Greggf7471142016-01-19 14:40:41 -080094 // create map key
Vicent Martie82fb1b2016-03-25 17:21:44 +010095 u64 zero = 0, *val;
Brendan Greggf7471142016-01-19 14:40:41 -080096 struct key_t key = {};
Vicent Martie82fb1b2016-03-25 17:21:44 +010097
Brendan Gregg38cef482016-01-15 17:26:30 -080098 bpf_get_current_comm(&key.name, sizeof(key.name));
Vicent Martie82fb1b2016-03-25 17:21:44 +010099 key.stack_id = stack_traces.get_stackid(ctx, BPF_F_REUSE_STACKID);
Brendan Gregg38cef482016-01-15 17:26:30 -0800100
Brendan Gregg38cef482016-01-15 17:26:30 -0800101 val = counts.lookup_or_init(&key, &zero);
102 (*val) += delta;
103 return 0;
104}
105"""
106if args.pid:
Brendan Greggd364d042016-01-19 17:12:52 -0800107 filter = 'pid == %s' % args.pid
108elif args.useronly:
109 filter = '!(prev->flags & PF_KTHREAD)'
Brendan Gregg38cef482016-01-15 17:26:30 -0800110else:
Brendan Greggd364d042016-01-19 17:12:52 -0800111 filter = '1'
112bpf_text = bpf_text.replace('FILTER', filter)
Brendan Gregg38cef482016-01-15 17:26:30 -0800113if debug:
114 print(bpf_text)
Brendan Greggd364d042016-01-19 17:12:52 -0800115
116# initialize BPF
Brendan Gregg38cef482016-01-15 17:26:30 -0800117b = BPF(text=bpf_text)
Brendan Gregg38cef482016-01-15 17:26:30 -0800118b.attach_kprobe(event="finish_task_switch", fn_name="oncpu")
119matched = b.num_open_kprobes()
120if matched == 0:
121 print("0 functions traced. Exiting.")
122 exit()
123
124# header
125if not folded:
126 print("Tracing off-CPU time (us) by kernel stack", end="")
127 if duration < 99999999:
128 print(" for %d secs." % duration)
129 else:
130 print("... Hit Ctrl-C to end.")
131
132# output
133while (1):
134 try:
135 sleep(duration)
136 except KeyboardInterrupt:
137 # as cleanup can take many seconds, trap Ctrl-C:
138 signal.signal(signal.SIGINT, signal_ignore)
139
140 if not folded:
141 print()
142 counts = b.get_table("counts")
Vicent Martie82fb1b2016-03-25 17:21:44 +0100143 stack_traces = b.get_table("stack_traces")
Brendan Gregg38cef482016-01-15 17:26:30 -0800144 for k, v in sorted(counts.items(), key=lambda counts: counts[1].value):
145 if folded:
146 # print folded stack output
Vicent Martie82fb1b2016-03-25 17:21:44 +0100147 stack = list(stack_traces.walk(k.stack_id))[1:]
148 line = [k.name.decode()] + [b.ksym(addr) for addr in reversed(stack)]
149 print("%s %d" % (";".join(line), v.value))
Brendan Gregg38cef482016-01-15 17:26:30 -0800150 else:
151 # print default multi-line stack output
Vicent Martie82fb1b2016-03-25 17:21:44 +0100152 for addr in stack_traces.walk(k.stack_id):
153 print(" %-16x %s" % (addr, b.ksym(addr)))
Brendan Gregg38cef482016-01-15 17:26:30 -0800154 print(" %-16s %s" % ("-", k.name))
155 print(" %d\n" % v.value)
156 counts.clear()
157
158 if not folded:
159 print("Detaching...")
160 exit()