blob: b8cafd97bf0c3089a5a04e360da5e07c39d09397 [file] [log] [blame]
Jiri Olsaac00ac52019-11-15 12:45:59 +01001#!/usr/bin/python
2#
Yonghong Song71f9c2a2019-12-04 19:45:27 -08003# klockstat traces lock events and display locks statistics.
Jiri Olsaac00ac52019-11-15 12:45:59 +01004#
Yonghong Song71f9c2a2019-12-04 19:45:27 -08005# USAGE: klockstat
Jiri Olsaac00ac52019-11-15 12:45:59 +01006#
7
8from __future__ import print_function
9from bcc import BPF, USDT
10import argparse
11import subprocess
12import ctypes as ct
13from time import sleep, strftime
14from datetime import datetime, timedelta
15import errno
16from sys import stderr
17
18examples = """
Yonghong Song71f9c2a2019-12-04 19:45:27 -080019 klockstat # trace system wide
20 klockstat -d 5 # trace for 5 seconds only
21 klockstat -i 5 # display stats every 5 seconds
22 klockstat -p 123 # trace locks for PID 123
23 klockstat -t 321 # trace locks for PID 321
24 klockstat -c pipe_ # display stats only for lock callers with 'pipe_' substring
25 klockstat -S acq_count # sort lock acquired results on acquired count
26 klockstat -S hld_total # sort lock held results on total held time
27 klockstat -S acq_count,hld_total # combination of above
28 klockstat -n 3 # display 3 locks
29 klockstat -s 3 # display 3 levels of stack
Jiri Olsaac00ac52019-11-15 12:45:59 +010030"""
31
32# arg validation
33def positive_int(val):
34 try:
35 ival = int(val)
36 except ValueError:
37 raise argparse.ArgumentTypeError("must be an integer")
38
39 if ival < 0:
40 raise argparse.ArgumentTypeError("must be positive")
41 return ival
42
43def positive_nonzero_int(val):
44 ival = positive_int(val)
45 if ival == 0:
46 raise argparse.ArgumentTypeError("must be nonzero")
47 return ival
48
49def stack_id_err(stack_id):
Michael Prokopc14d02a2020-01-09 02:29:18 +010050 # -EFAULT in get_stackid normally means the stack-trace is not available,
Jiri Olsaac00ac52019-11-15 12:45:59 +010051 # Such as getting kernel stack trace in userspace code
52 return (stack_id < 0) and (stack_id != -errno.EFAULT)
53
54parser = argparse.ArgumentParser(
55 description="",
56 formatter_class=argparse.RawDescriptionHelpFormatter,
57 epilog=examples)
58
59time_group = parser.add_mutually_exclusive_group()
60time_group.add_argument("-d", "--duration", type=int,
61 help="total duration of trace in seconds")
62time_group.add_argument("-i", "--interval", type=int,
63 help="print summary at this interval (seconds)")
Jerome Marchand10603c72020-06-11 10:08:26 +020064parser.add_argument("-n", "--locks", type=int, default=99999999,
Jiri Olsaac00ac52019-11-15 12:45:59 +010065 help="print given number of locks")
66parser.add_argument("-s", "--stacks", type=int, default=1,
67 help="print given number of stack entries")
68parser.add_argument("-c", "--caller",
69 help="print locks taken by given caller")
70parser.add_argument("-S", "--sort",
71 help="sort data on <aq_field,hd_field>, fields: acq_[max|total|count] hld_[max|total|count]")
72parser.add_argument("-p", "--pid",
73 help="trace this PID only")
74parser.add_argument("-t", "--tid",
75 help="trace this TID only")
76parser.add_argument("--stack-storage-size", default=16384,
77 type=positive_nonzero_int,
78 help="the number of unique stack traces that can be stored and "
79 "displayed (default 16384)")
80
81args = parser.parse_args()
82
83program = """
84#include <uapi/linux/ptrace.h>
85
86struct depth_id {
87 u64 id;
88 u64 depth;
89};
90
91BPF_ARRAY(enabled, u64, 1);
92BPF_HASH(track, u64, u64);
93BPF_HASH(time_aq, u64, u64);
94BPF_HASH(lock_depth, u64, u64);
95BPF_HASH(time_held, struct depth_id, u64);
96BPF_HASH(stack, struct depth_id, int);
97
98BPF_HASH(aq_report_count, int, u64);
99BPF_HASH(aq_report_max, int, u64);
100BPF_HASH(aq_report_total, int, u64);
101
102BPF_HASH(hl_report_count, int, u64);
103BPF_HASH(hl_report_max, int, u64);
104BPF_HASH(hl_report_total, int, u64);
105
106BPF_STACK_TRACE(stack_traces, STACK_STORAGE_SIZE);
107
108static bool is_enabled(void)
109{
110 int key = 0;
111 u64 *ret;
112
113 ret = enabled.lookup(&key);
114 return ret && *ret == 1;
115}
116
117static bool allow_pid(u64 id)
118{
119 u32 pid = id >> 32; // PID is higher part
120 u32 tid = id; // Cast and get the lower part
121
122 FILTER
123
124 return 1;
125}
126
Jiri Olsada7cac72019-12-23 23:08:27 +0100127static int do_mutex_lock_enter(void *ctx, int skip)
Jiri Olsaac00ac52019-11-15 12:45:59 +0100128{
129 if (!is_enabled())
130 return 0;
131
132 u64 id = bpf_get_current_pid_tgid();
133
134 if (!allow_pid(id))
135 return 0;
136
137 u64 one = 1, zero = 0;
138
139 track.update(&id, &one);
140
141 u64 *depth = lock_depth.lookup(&id);
142
143 if (!depth) {
144 lock_depth.update(&id, &zero);
145
146 depth = lock_depth.lookup(&id);
147 /* something is wrong.. */
148 if (!depth)
149 return 0;
150 }
151
Jiri Olsada7cac72019-12-23 23:08:27 +0100152 int stackid = stack_traces.get_stackid(ctx, skip);
Jiri Olsaac00ac52019-11-15 12:45:59 +0100153 struct depth_id did = {
154 .id = id,
155 .depth = *depth,
156 };
157 stack.update(&did, &stackid);
158
159 u64 ts = bpf_ktime_get_ns();
160 time_aq.update(&id, &ts);
161
162 *depth += 1;
163 return 0;
164}
165
166static void update_aq_report_count(int *stackid)
167{
168 u64 *count, one = 1;
169
170 count = aq_report_count.lookup(stackid);
171 if (!count) {
172 aq_report_count.update(stackid, &one);
173 } else {
174 *count += 1;
175 }
176}
177
178static void update_hl_report_count(int *stackid)
179{
180 u64 *count, one = 1;
181
182 count = hl_report_count.lookup(stackid);
183 if (!count) {
184 hl_report_count.update(stackid, &one);
185 } else {
186 *count += 1;
187 }
188}
189
190static void update_aq_report_max(int *stackid, u64 time)
191{
192 u64 *max;
193
194 max = aq_report_max.lookup(stackid);
195 if (!max || *max < time)
196 aq_report_max.update(stackid, &time);
197}
198
199static void update_hl_report_max(int *stackid, u64 time)
200{
201 u64 *max;
202
203 max = hl_report_max.lookup(stackid);
204 if (!max || *max < time)
205 hl_report_max.update(stackid, &time);
206}
207
208static void update_aq_report_total(int *stackid, u64 delta)
209{
210 u64 *count, *time;
211
212 count = aq_report_count.lookup(stackid);
213 if (!count)
214 return;
215
216 time = aq_report_total.lookup(stackid);
217 if (!time) {
218 aq_report_total.update(stackid, &delta);
219 } else {
220 *time = *time + delta;
221 }
222}
223
224static void update_hl_report_total(int *stackid, u64 delta)
225{
226 u64 *count, *time;
227
228 count = hl_report_count.lookup(stackid);
229 if (!count)
230 return;
231
232 time = hl_report_total.lookup(stackid);
233 if (!time) {
234 hl_report_total.update(stackid, &delta);
235 } else {
236 *time = *time + delta;
237 }
238}
239
Jiri Olsada7cac72019-12-23 23:08:27 +0100240static int do_mutex_lock_return(void)
Jiri Olsaac00ac52019-11-15 12:45:59 +0100241{
242 if (!is_enabled())
243 return 0;
244
245 u64 id = bpf_get_current_pid_tgid();
246
247 if (!allow_pid(id))
248 return 0;
249
250 u64 *one = track.lookup(&id);
251
252 if (!one)
253 return 0;
254
255 track.delete(&id);
256
257 u64 *depth = lock_depth.lookup(&id);
258 if (!depth)
259 return 0;
260
261 struct depth_id did = {
262 .id = id,
263 .depth = *depth - 1,
264 };
265
266 u64 *aq = time_aq.lookup(&id);
267 if (!aq)
268 return 0;
269
270 int *stackid = stack.lookup(&did);
271 if (!stackid)
272 return 0;
273
uppinggoerea8bbbf2019-11-29 15:33:44 +0800274 int stackid_ = *stackid;
Jiri Olsaac00ac52019-11-15 12:45:59 +0100275 u64 cur = bpf_ktime_get_ns();
276
277 if (cur > *aq) {
278 int val = cur - *aq;
uppinggoerea8bbbf2019-11-29 15:33:44 +0800279 update_aq_report_count(&stackid_);
280 update_aq_report_max(&stackid_, val);
281 update_aq_report_total(&stackid_, val);
Jiri Olsaac00ac52019-11-15 12:45:59 +0100282 }
283
284 time_held.update(&did, &cur);
285 return 0;
286}
287
Jiri Olsada7cac72019-12-23 23:08:27 +0100288static int do_mutex_unlock_enter(void)
Jiri Olsaac00ac52019-11-15 12:45:59 +0100289{
290 if (!is_enabled())
291 return 0;
292
293 u64 id = bpf_get_current_pid_tgid();
294
295 if (!allow_pid(id))
296 return 0;
297
298 u64 *depth = lock_depth.lookup(&id);
299
300 if (!depth || *depth == 0)
301 return 0;
302
303 *depth -= 1;
304
305 struct depth_id did = {
306 .id = id,
307 .depth = *depth,
308 };
309
310 u64 *held = time_held.lookup(&did);
311 if (!held)
312 return 0;
313
314 int *stackid = stack.lookup(&did);
315 if (!stackid)
316 return 0;
317
uppinggoerea8bbbf2019-11-29 15:33:44 +0800318
319 int stackid_ = *stackid;
Jiri Olsaac00ac52019-11-15 12:45:59 +0100320 u64 cur = bpf_ktime_get_ns();
321
322 if (cur > *held) {
323 u64 val = cur - *held;
uppinggoerea8bbbf2019-11-29 15:33:44 +0800324 update_hl_report_count(&stackid_);
325 update_hl_report_max(&stackid_, val);
326 update_hl_report_total(&stackid_, val);
Jiri Olsaac00ac52019-11-15 12:45:59 +0100327 }
328
329 stack.delete(&did);
330 time_held.delete(&did);
331 return 0;
332}
Jiri Olsada7cac72019-12-23 23:08:27 +0100333"""
334
335program_kprobe = """
336int mutex_unlock_enter(struct pt_regs *ctx)
337{
338 return do_mutex_unlock_enter();
339}
340
341int mutex_lock_return(struct pt_regs *ctx)
342{
343 return do_mutex_lock_return();
344}
345
346int mutex_lock_enter(struct pt_regs *ctx)
347{
348 return do_mutex_lock_enter(ctx, 0);
349}
350"""
351
352program_kfunc = """
353KFUNC_PROBE(mutex_unlock, void *lock)
354{
Mauricio Vásquez44e0f432020-05-21 11:50:52 -0500355 return do_mutex_unlock_enter();
Jiri Olsada7cac72019-12-23 23:08:27 +0100356}
357
358KRETFUNC_PROBE(mutex_lock, void *lock, int ret)
359{
Mauricio Vásquez44e0f432020-05-21 11:50:52 -0500360 return do_mutex_lock_return();
Jiri Olsada7cac72019-12-23 23:08:27 +0100361}
362
363KFUNC_PROBE(mutex_lock, void *lock)
364{
Mauricio Vásquez44e0f432020-05-21 11:50:52 -0500365 return do_mutex_lock_enter(ctx, 3);
Jiri Olsada7cac72019-12-23 23:08:27 +0100366}
Jiri Olsaac00ac52019-11-15 12:45:59 +0100367
368"""
369
Aaron U'Ren83e0ee72022-02-25 15:09:08 -0600370program_kfunc_nested = """
371KFUNC_PROBE(mutex_unlock, void *lock)
372{
373 return do_mutex_unlock_enter();
374}
375
376KRETFUNC_PROBE(mutex_lock_nested, void *lock, int ret)
377{
378 return do_mutex_lock_return();
379}
380
381KFUNC_PROBE(mutex_lock_nested, void *lock)
382{
383 return do_mutex_lock_enter(ctx, 3);
384}
385
386"""
387
Jiri Olsada7cac72019-12-23 23:08:27 +0100388is_support_kfunc = BPF.support_kfunc()
389if is_support_kfunc:
Aaron U'Ren83e0ee72022-02-25 15:09:08 -0600390 if BPF.get_kprobe_functions(b"mutex_lock_nested"):
391 program += program_kfunc_nested
392 else:
393 program += program_kfunc
Jiri Olsada7cac72019-12-23 23:08:27 +0100394else:
395 program += program_kprobe
396
Jiri Olsaac00ac52019-11-15 12:45:59 +0100397def sort_list(maxs, totals, counts):
398 if (not args.sort):
399 return maxs;
400
401 for field in args.sort.split(','):
402 if (field == "acq_max" or field == "hld_max"):
403 return maxs
404 if (field == "acq_total" or field == "hld_total"):
405 return totals
406 if (field == "acq_count" or field == "hld_count"):
407 return counts
408
409 print("Wrong sort argument: %s", args.sort)
410 exit(-1)
411
412def display(sort, maxs, totals, counts):
413 global missing_stacks
414 global has_enomem
415
416 for k, v in sorted(sort.items(), key=lambda sort: sort[1].value, reverse=True)[:args.locks]:
417 missing_stacks += int(stack_id_err(k.value))
418 has_enomem = has_enomem or (k.value == -errno.ENOMEM)
419
420 caller = "[Missed Kernel Stack]"
421 stack = []
422
423 if (k.value >= 0):
424 stack = list(stack_traces.walk(k.value))
425 caller = b.ksym(stack[1], show_offset=True)
426
Marko Myllynenc71394a2019-12-16 11:35:25 +0200427 if (args.caller and caller.find(args.caller.encode())):
Jiri Olsaac00ac52019-11-15 12:45:59 +0100428 continue
429
430 avg = totals[k].value / counts[k].value
431
432 print("%40s %10lu %6lu %10lu %10lu" % (caller, avg, counts[k].value, maxs[k].value, totals[k].value))
433
Jiri Olsa90b23822019-12-24 01:18:09 +0100434 for addr in stack[2:args.stacks]:
Jiri Olsaac00ac52019-11-15 12:45:59 +0100435 print("%40s" % b.ksym(addr, show_offset=True))
436
437
438if args.tid: # TID trumps PID
439 program = program.replace('FILTER',
440 'if (tid != %s) { return 0; }' % args.tid)
441elif args.pid:
442 program = program.replace('FILTER',
443 'if (pid != %s) { return 0; }' % args.pid)
444else:
445 program = program.replace('FILTER', '')
446
447program = program.replace('STACK_STORAGE_SIZE', str(args.stack_storage_size))
448
449b = BPF(text=program)
450
Jiri Olsada7cac72019-12-23 23:08:27 +0100451if not is_support_kfunc:
Aaron U'Ren83e0ee72022-02-25 15:09:08 -0600452 b.attach_kprobe(event="mutex_unlock", fn_name="mutex_unlock_enter")
453 # Depending on whether DEBUG_LOCK_ALLOC is set, the proper kprobe may be either mutex_lock or mutex_lock_nested
454 if BPF.get_kprobe_functions(b"mutex_lock_nested"):
455 b.attach_kretprobe(event="mutex_lock_nested", fn_name="mutex_lock_return")
456 b.attach_kprobe(event="mutex_lock_nested", fn_name="mutex_lock_enter")
457 else:
458 b.attach_kretprobe(event="mutex_lock", fn_name="mutex_lock_return")
459 b.attach_kprobe(event="mutex_lock", fn_name="mutex_lock_enter")
Jiri Olsaac00ac52019-11-15 12:45:59 +0100460
461enabled = b.get_table("enabled");
462
463stack_traces = b.get_table("stack_traces")
464aq_counts = b.get_table("aq_report_count")
465aq_maxs = b.get_table("aq_report_max")
466aq_totals = b.get_table("aq_report_total")
467
468hl_counts = b.get_table("hl_report_count")
469hl_maxs = b.get_table("hl_report_max")
470hl_totals = b.get_table("hl_report_total")
471
472aq_sort = sort_list(aq_maxs, aq_totals, aq_counts)
473hl_sort = sort_list(hl_maxs, hl_totals, hl_counts)
474
475print("Tracing lock events... Hit Ctrl-C to end.")
476
477# duration and interval are mutualy exclusive
478exiting = 0 if args.interval else 1
479exiting = 1 if args.duration else 0
480
Jerome Marchand10603c72020-06-11 10:08:26 +0200481seconds = 99999999
Jiri Olsaac00ac52019-11-15 12:45:59 +0100482if args.interval:
483 seconds = args.interval
484if args.duration:
485 seconds = args.duration
486
487missing_stacks = 0
488has_enomem = False
489
490while (1):
491 enabled[ct.c_int(0)] = ct.c_int(1)
492
493 try:
494 sleep(seconds)
495 except KeyboardInterrupt:
496 exiting = 1
497
498 enabled[ct.c_int(0)] = ct.c_int(0)
499
500 print("\n%40s %10s %6s %10s %10s" % ("Caller", "Avg Spin", "Count", "Max spin", "Total spin"))
501 display(aq_sort, aq_maxs, aq_totals, aq_counts)
502
503
504 print("\n%40s %10s %6s %10s %10s" % ("Caller", "Avg Hold", "Count", "Max hold", "Total hold"))
505 display(hl_sort, hl_maxs, hl_totals, hl_counts)
506
507 if exiting:
508 break;
509
510 stack_traces.clear()
511 aq_counts.clear()
512 aq_maxs.clear()
513 aq_totals.clear()
514 hl_counts.clear()
515 hl_maxs.clear()
516 hl_totals.clear()
517
518if missing_stacks > 0:
519 enomem_str = " Consider increasing --stack-storage-size."
520 print("WARNING: %d stack traces lost and could not be displayed.%s" %
521 (missing_stacks, (enomem_str if has_enomem else "")),
522 file=stderr)