blob: 6d15b622d2f25862af11e696893161c20bca7137 [file] [log] [blame]
Alexey Ivanovcc01a9c2019-01-16 09:50:46 -08001#!/usr/bin/python
Joel0b813f82018-06-18 19:37:38 -07002# @lint-avoid-python-3-compatibility-imports
3#
4# criticalstat Trace long critical sections (IRQs or preemption disabled)
5# For Linux, uses BCC, eBPF. Requires kernel built with
6# CONFIG_DEBUG_PREEMPT and CONFIG_PREEMPTIRQ_EVENTS
7#
8# USAGE: criticalstat [-h] [-p] [-i] [-d DURATION]
9#
10# Copyright (c) 2018, Google, Inc.
11# Licensed under the Apache License, Version 2.0 (the "License")
12#
13# By Joel Fernandes <joel@joelfernandes.org>
14
15from __future__ import print_function
16from bcc import BPF
17import argparse
Joel0b813f82018-06-18 19:37:38 -070018import sys
19import subprocess
20import os.path
21
22examples=""
23
24parser = argparse.ArgumentParser(
25 description="Trace long critical sections",
26 formatter_class=argparse.RawDescriptionHelpFormatter,
27 epilog=examples)
28
29parser.add_argument("-p", "--preemptoff", action="store_true",
30 help="Find long sections where preemption was off")
31
32parser.add_argument("-i", "--irqoff", action="store_true",
33 help="Find long sections where IRQ was off")
34
35parser.add_argument("-d", "--duration", default=100,
36 help="Duration in uS (microseconds) below which we filter")
37
38args = parser.parse_args()
39
40preemptoff = False
41irqoff = False
42
43if args.irqoff:
44 preemptoff = False
45 irqoff = True
46elif args.preemptoff:
47 preemptoff = True
48 irqoff = False
49
50debugfs_path = subprocess.Popen ("cat /proc/mounts | grep -w debugfs" +
51 " | awk '{print $2}'",
52 shell=True,
53 stdout=subprocess.PIPE).stdout.read().split(b"\n")[0]
54
55if debugfs_path == "":
56 print("ERROR: Unable to find debugfs mount point");
57 sys.exit(0);
58
59trace_path = debugfs_path + b"/tracing/events/preemptirq/";
60
61if (not os.path.exists(trace_path + b"irq_disable") or
62 not os.path.exists(trace_path + b"irq_enable") or
63 not os.path.exists(trace_path + b"preempt_disable") or
64 not os.path.exists(trace_path + b"preempt_enable")):
65 print("ERROR: required tracing events are not available\n" +
66 "Make sure the kernel is built with CONFIG_DEBUG_PREEMPT " +
Edward Wu27f39872021-08-30 08:07:17 +080067 "CONFIG_PREEMPT_TRACER " +
Tsai-Wei Wue330e812021-07-20 15:00:11 +080068 "and CONFIG_PREEMPTIRQ_EVENTS (CONFIG_PREEMPTIRQ_TRACEPOINTS in "
69 "kernel 4.19 and later) enabled. Also please disable " +
Joel0b813f82018-06-18 19:37:38 -070070 "CONFIG_PROVE_LOCKING and CONFIG_LOCKDEP on older kernels.")
71 sys.exit(0)
72
73bpf_text = """
74#include <uapi/linux/ptrace.h>
75#include <linux/sched.h>
76
77enum addr_offs {
78 START_CALLER_OFF,
79 START_PARENT_OFF,
80 END_CALLER_OFF,
81 END_PARENT_OFF
82};
83
84struct start_data {
85 u32 addr_offs[2];
86 u64 ts;
87 int idle_skip;
88 int active;
89};
90
91struct data_t {
92 u64 time;
93 s64 stack_id;
94 u32 cpu;
95 u64 id;
96 u32 addrs[4]; /* indexed by addr_offs */
97 char comm[TASK_COMM_LEN];
98};
99
100BPF_STACK_TRACE(stack_traces, 16384);
101BPF_PERCPU_ARRAY(sts, struct start_data, 1);
102BPF_PERCPU_ARRAY(isidle, u64, 1);
103BPF_PERF_OUTPUT(events);
104
105/*
106 * In the below code we install tracepoint probes on preempt or
107 * IRQ disable/enable critical sections and idle events, the cases
108 * are combinations of 4 different states.
109 * The states are defined as:
110 * CSenter: A critical section has been entered. Either due to
111 * preempt disable or irq disable.
112 * CSexit: A critical section has been exited. Either due to
113 * preempt enable or irq enable.
114 * Ienter: The CPU has entered an idle state.
115 * Iexit: The CPU has exited an idle state.
116 *
117 * The scenario we are trying to detect is if there is an overlap
118 * between Critical sections and idle entry/exit. If there are any
119 * such cases, we avoid recording those critical sections since they
120 * are not worth while to record and just add noise.
121 */
122TRACEPOINT_PROBE(power, cpu_idle)
123{
124 int idx = 0;
125 u64 val;
126 struct start_data *stdp, std;
127
128 // Mark active sections as that they should be skipped
129
130 // Handle the case CSenter, Ienter, CSexit, Iexit
131 // Handle the case CSenter, Ienter, Iexit, CSexit
132 stdp = sts.lookup(&idx);
133 if (stdp && stdp->active) {
134 /*
135 * Due to verifier issues, we have to copy contents
136 * of stdp onto the stack before the update.
137 * Fix it to directly update once kernel patch d71962f
138 * becomes more widespread.
139 */
140 std = *stdp;
141 std.idle_skip = 1;
142 sts.update(&idx, &std);
143 }
144
145 // Mark CPU as actively within idle or not.
146 if (args->state < 100) {
147 val = 1;
148 isidle.update(&idx, &val);
149 } else {
150 val = 0;
151 isidle.update(&idx, &val);
152 }
153 return 0;
154}
155
156static int in_idle(void)
157{
158 u64 *idlep;
159 int idx = 0;
160
161 // Skip event if we're in idle loop
162 idlep = isidle.lookup(&idx);
163 if (idlep && *idlep)
164 return 1;
165 return 0;
166}
167
168static void reset_state(void)
169{
170 int idx = 0;
171 struct start_data s = {};
172
173 sts.update(&idx, &s);
174}
175
176TRACEPOINT_PROBE(preemptirq, TYPE_disable)
177{
178 int idx = 0;
179 struct start_data s;
180
181 // Handle the case Ienter, CSenter, CSexit, Iexit
182 // Handle the case Ienter, CSenter, Iexit, CSexit
183 if (in_idle()) {
184 reset_state();
185 return 0;
186 }
187
188 u64 ts = bpf_ktime_get_ns();
189
190 s.idle_skip = 0;
191 s.addr_offs[START_CALLER_OFF] = args->caller_offs;
192 s.addr_offs[START_PARENT_OFF] = args->parent_offs;
193 s.ts = ts;
194 s.active = 1;
195
196 sts.update(&idx, &s);
197 return 0;
198}
199
200TRACEPOINT_PROBE(preemptirq, TYPE_enable)
201{
202 int idx = 0;
203 u64 start_ts, end_ts, diff;
204 struct start_data *stdp;
205
206 // Handle the case CSenter, Ienter, CSexit, Iexit
207 // Handle the case Ienter, CSenter, CSexit, Iexit
208 if (in_idle()) {
209 reset_state();
210 return 0;
211 }
212
213 stdp = sts.lookup(&idx);
214 if (!stdp) {
215 reset_state();
216 return 0;
217 }
218
219 // Handle the case Ienter, Csenter, Iexit, Csexit
220 if (!stdp->active) {
221 reset_state();
222 return 0;
223 }
224
225 // Handle the case CSenter, Ienter, Iexit, CSexit
226 if (stdp->idle_skip) {
227 reset_state();
228 return 0;
229 }
230
231 end_ts = bpf_ktime_get_ns();
232 start_ts = stdp->ts;
233
234 if (start_ts > end_ts) {
235 reset_state();
236 return 0;
237 }
238
239 diff = end_ts - start_ts;
240
241 if (diff < DURATION) {
242 reset_state();
243 return 0;
244 }
245
246 u64 id = bpf_get_current_pid_tgid();
247 struct data_t data = {};
248
249 if (bpf_get_current_comm(&data.comm, sizeof(data.comm)) == 0) {
250 data.addrs[START_CALLER_OFF] = stdp->addr_offs[START_CALLER_OFF];
251 data.addrs[START_PARENT_OFF] = stdp->addr_offs[START_PARENT_OFF];
252 data.addrs[END_CALLER_OFF] = args->caller_offs;
253 data.addrs[END_PARENT_OFF] = args->parent_offs;
254
255 data.id = id;
256 data.stack_id = stack_traces.get_stackid(args, 0);
257 data.time = diff;
258 data.cpu = bpf_get_smp_processor_id();
259 events.perf_submit(args, &data, sizeof(data));
260 }
261
262 reset_state();
263 return 0;
264}
265"""
266bpf_text = bpf_text.replace('DURATION', '{}'.format(int(args.duration) * 1000))
267
268if preemptoff:
269 bpf_text = bpf_text.replace('TYPE', 'preempt')
270else:
271 bpf_text = bpf_text.replace('TYPE', 'irq')
272
273b = BPF(text=bpf_text)
274
Joel0b813f82018-06-18 19:37:38 -0700275def get_syms(kstack):
276 syms = []
277
278 for addr in kstack:
279 s = b.ksym(addr, show_offset=True)
280 syms.append(s)
281
282 return syms
283
284# process event
285def print_event(cpu, data, size):
286 try:
287 global b
Xiaozhou Liu51d62d32019-02-15 13:03:05 +0800288 event = b["events"].event(data)
Joel0b813f82018-06-18 19:37:38 -0700289 stack_traces = b['stack_traces']
290 stext = b.ksymname('_stext')
291
292 print("===================================")
293 print("TASK: %s (pid %5d tid %5d) Total Time: %-9.3fus\n\n" % (event.comm, \
294 (event.id >> 32), (event.id & 0xffffffff), float(event.time) / 1000), end="")
295 print("Section start: {} -> {}".format(b.ksym(stext + event.addrs[0]), b.ksym(stext + event.addrs[1])))
296 print("Section end: {} -> {}".format(b.ksym(stext + event.addrs[2]), b.ksym(stext + event.addrs[3])))
297
Joeld6019842018-06-19 16:33:56 -0700298 if event.stack_id >= 0:
Joel0b813f82018-06-18 19:37:38 -0700299 kstack = stack_traces.walk(event.stack_id)
300 syms = get_syms(kstack)
301 if not syms:
302 return
303
304 for s in syms:
305 print(" ", end="")
306 print("%s" % s)
307 else:
308 print("NO STACK FOUND DUE TO COLLISION")
309 print("===================================")
310 print("")
Teng Qinaaca9762019-01-11 11:18:45 -0800311 except Exception:
Joel0b813f82018-06-18 19:37:38 -0700312 sys.exit(0)
313
314b["events"].open_perf_buffer(print_event, page_cnt=256)
315
316print("Finding critical section with {} disabled for > {}us".format(
317 ('preempt' if preemptoff else 'IRQ'), args.duration))
318
319while 1:
Jerome Marchand51671272018-12-19 01:57:24 +0100320 try:
321 b.perf_buffer_poll()
322 except KeyboardInterrupt:
323 exit()