blob: d157b7be1759058ab22122b51a7d850d52bd1095 [file] [log] [blame]
Jiri Olsaac00ac52019-11-15 12:45:59 +01001#!/usr/bin/python
2#
Yonghong Song71f9c2a2019-12-04 19:45:27 -08003# klockstat traces lock events and display locks statistics.
Jiri Olsaac00ac52019-11-15 12:45:59 +01004#
Yonghong Song71f9c2a2019-12-04 19:45:27 -08005# USAGE: klockstat
Jiri Olsaac00ac52019-11-15 12:45:59 +01006#
7
8from __future__ import print_function
9from bcc import BPF, USDT
10import argparse
11import subprocess
12import ctypes as ct
13from time import sleep, strftime
14from datetime import datetime, timedelta
15import errno
16from sys import stderr
17
18examples = """
Yonghong Song71f9c2a2019-12-04 19:45:27 -080019 klockstat # trace system wide
20 klockstat -d 5 # trace for 5 seconds only
21 klockstat -i 5 # display stats every 5 seconds
22 klockstat -p 123 # trace locks for PID 123
23 klockstat -t 321 # trace locks for PID 321
24 klockstat -c pipe_ # display stats only for lock callers with 'pipe_' substring
25 klockstat -S acq_count # sort lock acquired results on acquired count
26 klockstat -S hld_total # sort lock held results on total held time
27 klockstat -S acq_count,hld_total # combination of above
28 klockstat -n 3 # display 3 locks
29 klockstat -s 3 # display 3 levels of stack
Jiri Olsaac00ac52019-11-15 12:45:59 +010030"""
31
32# arg validation
33def positive_int(val):
34 try:
35 ival = int(val)
36 except ValueError:
37 raise argparse.ArgumentTypeError("must be an integer")
38
39 if ival < 0:
40 raise argparse.ArgumentTypeError("must be positive")
41 return ival
42
43def positive_nonzero_int(val):
44 ival = positive_int(val)
45 if ival == 0:
46 raise argparse.ArgumentTypeError("must be nonzero")
47 return ival
48
49def stack_id_err(stack_id):
Michael Prokopc14d02a2020-01-09 02:29:18 +010050 # -EFAULT in get_stackid normally means the stack-trace is not available,
Jiri Olsaac00ac52019-11-15 12:45:59 +010051 # Such as getting kernel stack trace in userspace code
52 return (stack_id < 0) and (stack_id != -errno.EFAULT)
53
54parser = argparse.ArgumentParser(
55 description="",
56 formatter_class=argparse.RawDescriptionHelpFormatter,
57 epilog=examples)
58
59time_group = parser.add_mutually_exclusive_group()
60time_group.add_argument("-d", "--duration", type=int,
61 help="total duration of trace in seconds")
62time_group.add_argument("-i", "--interval", type=int,
63 help="print summary at this interval (seconds)")
Jerome Marchand10603c72020-06-11 10:08:26 +020064parser.add_argument("-n", "--locks", type=int, default=99999999,
Jiri Olsaac00ac52019-11-15 12:45:59 +010065 help="print given number of locks")
66parser.add_argument("-s", "--stacks", type=int, default=1,
67 help="print given number of stack entries")
68parser.add_argument("-c", "--caller",
69 help="print locks taken by given caller")
70parser.add_argument("-S", "--sort",
71 help="sort data on <aq_field,hd_field>, fields: acq_[max|total|count] hld_[max|total|count]")
72parser.add_argument("-p", "--pid",
73 help="trace this PID only")
74parser.add_argument("-t", "--tid",
75 help="trace this TID only")
76parser.add_argument("--stack-storage-size", default=16384,
77 type=positive_nonzero_int,
78 help="the number of unique stack traces that can be stored and "
79 "displayed (default 16384)")
80
81args = parser.parse_args()
82
83program = """
84#include <uapi/linux/ptrace.h>
85
86struct depth_id {
87 u64 id;
88 u64 depth;
89};
90
91BPF_ARRAY(enabled, u64, 1);
92BPF_HASH(track, u64, u64);
93BPF_HASH(time_aq, u64, u64);
94BPF_HASH(lock_depth, u64, u64);
95BPF_HASH(time_held, struct depth_id, u64);
96BPF_HASH(stack, struct depth_id, int);
97
98BPF_HASH(aq_report_count, int, u64);
99BPF_HASH(aq_report_max, int, u64);
100BPF_HASH(aq_report_total, int, u64);
101
102BPF_HASH(hl_report_count, int, u64);
103BPF_HASH(hl_report_max, int, u64);
104BPF_HASH(hl_report_total, int, u64);
105
106BPF_STACK_TRACE(stack_traces, STACK_STORAGE_SIZE);
107
108static bool is_enabled(void)
109{
110 int key = 0;
111 u64 *ret;
112
113 ret = enabled.lookup(&key);
114 return ret && *ret == 1;
115}
116
117static bool allow_pid(u64 id)
118{
119 u32 pid = id >> 32; // PID is higher part
120 u32 tid = id; // Cast and get the lower part
121
122 FILTER
123
124 return 1;
125}
126
Jiri Olsada7cac72019-12-23 23:08:27 +0100127static int do_mutex_lock_enter(void *ctx, int skip)
Jiri Olsaac00ac52019-11-15 12:45:59 +0100128{
129 if (!is_enabled())
130 return 0;
131
132 u64 id = bpf_get_current_pid_tgid();
133
134 if (!allow_pid(id))
135 return 0;
136
137 u64 one = 1, zero = 0;
138
139 track.update(&id, &one);
140
141 u64 *depth = lock_depth.lookup(&id);
142
143 if (!depth) {
144 lock_depth.update(&id, &zero);
145
146 depth = lock_depth.lookup(&id);
147 /* something is wrong.. */
148 if (!depth)
149 return 0;
150 }
151
Jiri Olsada7cac72019-12-23 23:08:27 +0100152 int stackid = stack_traces.get_stackid(ctx, skip);
Jiri Olsaac00ac52019-11-15 12:45:59 +0100153 struct depth_id did = {
154 .id = id,
155 .depth = *depth,
156 };
157 stack.update(&did, &stackid);
158
159 u64 ts = bpf_ktime_get_ns();
160 time_aq.update(&id, &ts);
161
162 *depth += 1;
163 return 0;
164}
165
166static void update_aq_report_count(int *stackid)
167{
168 u64 *count, one = 1;
169
170 count = aq_report_count.lookup(stackid);
171 if (!count) {
172 aq_report_count.update(stackid, &one);
173 } else {
174 *count += 1;
175 }
176}
177
178static void update_hl_report_count(int *stackid)
179{
180 u64 *count, one = 1;
181
182 count = hl_report_count.lookup(stackid);
183 if (!count) {
184 hl_report_count.update(stackid, &one);
185 } else {
186 *count += 1;
187 }
188}
189
190static void update_aq_report_max(int *stackid, u64 time)
191{
192 u64 *max;
193
194 max = aq_report_max.lookup(stackid);
195 if (!max || *max < time)
196 aq_report_max.update(stackid, &time);
197}
198
199static void update_hl_report_max(int *stackid, u64 time)
200{
201 u64 *max;
202
203 max = hl_report_max.lookup(stackid);
204 if (!max || *max < time)
205 hl_report_max.update(stackid, &time);
206}
207
208static void update_aq_report_total(int *stackid, u64 delta)
209{
210 u64 *count, *time;
211
212 count = aq_report_count.lookup(stackid);
213 if (!count)
214 return;
215
216 time = aq_report_total.lookup(stackid);
217 if (!time) {
218 aq_report_total.update(stackid, &delta);
219 } else {
220 *time = *time + delta;
221 }
222}
223
224static void update_hl_report_total(int *stackid, u64 delta)
225{
226 u64 *count, *time;
227
228 count = hl_report_count.lookup(stackid);
229 if (!count)
230 return;
231
232 time = hl_report_total.lookup(stackid);
233 if (!time) {
234 hl_report_total.update(stackid, &delta);
235 } else {
236 *time = *time + delta;
237 }
238}
239
Jiri Olsada7cac72019-12-23 23:08:27 +0100240static int do_mutex_lock_return(void)
Jiri Olsaac00ac52019-11-15 12:45:59 +0100241{
242 if (!is_enabled())
243 return 0;
244
245 u64 id = bpf_get_current_pid_tgid();
246
247 if (!allow_pid(id))
248 return 0;
249
250 u64 *one = track.lookup(&id);
251
252 if (!one)
253 return 0;
254
255 track.delete(&id);
256
257 u64 *depth = lock_depth.lookup(&id);
258 if (!depth)
259 return 0;
260
261 struct depth_id did = {
262 .id = id,
263 .depth = *depth - 1,
264 };
265
266 u64 *aq = time_aq.lookup(&id);
267 if (!aq)
268 return 0;
269
270 int *stackid = stack.lookup(&did);
271 if (!stackid)
272 return 0;
273
uppinggoerea8bbbf2019-11-29 15:33:44 +0800274 int stackid_ = *stackid;
Jiri Olsaac00ac52019-11-15 12:45:59 +0100275 u64 cur = bpf_ktime_get_ns();
276
277 if (cur > *aq) {
278 int val = cur - *aq;
uppinggoerea8bbbf2019-11-29 15:33:44 +0800279 update_aq_report_count(&stackid_);
280 update_aq_report_max(&stackid_, val);
281 update_aq_report_total(&stackid_, val);
Jiri Olsaac00ac52019-11-15 12:45:59 +0100282 }
283
284 time_held.update(&did, &cur);
285 return 0;
286}
287
Jiri Olsada7cac72019-12-23 23:08:27 +0100288static int do_mutex_unlock_enter(void)
Jiri Olsaac00ac52019-11-15 12:45:59 +0100289{
290 if (!is_enabled())
291 return 0;
292
293 u64 id = bpf_get_current_pid_tgid();
294
295 if (!allow_pid(id))
296 return 0;
297
298 u64 *depth = lock_depth.lookup(&id);
299
300 if (!depth || *depth == 0)
301 return 0;
302
303 *depth -= 1;
304
305 struct depth_id did = {
306 .id = id,
307 .depth = *depth,
308 };
309
310 u64 *held = time_held.lookup(&did);
311 if (!held)
312 return 0;
313
314 int *stackid = stack.lookup(&did);
315 if (!stackid)
316 return 0;
317
uppinggoerea8bbbf2019-11-29 15:33:44 +0800318
319 int stackid_ = *stackid;
Jiri Olsaac00ac52019-11-15 12:45:59 +0100320 u64 cur = bpf_ktime_get_ns();
321
322 if (cur > *held) {
323 u64 val = cur - *held;
uppinggoerea8bbbf2019-11-29 15:33:44 +0800324 update_hl_report_count(&stackid_);
325 update_hl_report_max(&stackid_, val);
326 update_hl_report_total(&stackid_, val);
Jiri Olsaac00ac52019-11-15 12:45:59 +0100327 }
328
329 stack.delete(&did);
330 time_held.delete(&did);
331 return 0;
332}
Jiri Olsada7cac72019-12-23 23:08:27 +0100333"""
334
335program_kprobe = """
336int mutex_unlock_enter(struct pt_regs *ctx)
337{
338 return do_mutex_unlock_enter();
339}
340
341int mutex_lock_return(struct pt_regs *ctx)
342{
343 return do_mutex_lock_return();
344}
345
346int mutex_lock_enter(struct pt_regs *ctx)
347{
348 return do_mutex_lock_enter(ctx, 0);
349}
350"""
351
352program_kfunc = """
353KFUNC_PROBE(mutex_unlock, void *lock)
354{
Mauricio Vásquez44e0f432020-05-21 11:50:52 -0500355 return do_mutex_unlock_enter();
Jiri Olsada7cac72019-12-23 23:08:27 +0100356}
357
358KRETFUNC_PROBE(mutex_lock, void *lock, int ret)
359{
Mauricio Vásquez44e0f432020-05-21 11:50:52 -0500360 return do_mutex_lock_return();
Jiri Olsada7cac72019-12-23 23:08:27 +0100361}
362
363KFUNC_PROBE(mutex_lock, void *lock)
364{
Mauricio Vásquez44e0f432020-05-21 11:50:52 -0500365 return do_mutex_lock_enter(ctx, 3);
Jiri Olsada7cac72019-12-23 23:08:27 +0100366}
Jiri Olsaac00ac52019-11-15 12:45:59 +0100367
368"""
369
Jiri Olsada7cac72019-12-23 23:08:27 +0100370is_support_kfunc = BPF.support_kfunc()
371if is_support_kfunc:
372 program += program_kfunc
373else:
374 program += program_kprobe
375
Jiri Olsaac00ac52019-11-15 12:45:59 +0100376def sort_list(maxs, totals, counts):
377 if (not args.sort):
378 return maxs;
379
380 for field in args.sort.split(','):
381 if (field == "acq_max" or field == "hld_max"):
382 return maxs
383 if (field == "acq_total" or field == "hld_total"):
384 return totals
385 if (field == "acq_count" or field == "hld_count"):
386 return counts
387
388 print("Wrong sort argument: %s", args.sort)
389 exit(-1)
390
391def display(sort, maxs, totals, counts):
392 global missing_stacks
393 global has_enomem
394
395 for k, v in sorted(sort.items(), key=lambda sort: sort[1].value, reverse=True)[:args.locks]:
396 missing_stacks += int(stack_id_err(k.value))
397 has_enomem = has_enomem or (k.value == -errno.ENOMEM)
398
399 caller = "[Missed Kernel Stack]"
400 stack = []
401
402 if (k.value >= 0):
403 stack = list(stack_traces.walk(k.value))
404 caller = b.ksym(stack[1], show_offset=True)
405
Marko Myllynenc71394a2019-12-16 11:35:25 +0200406 if (args.caller and caller.find(args.caller.encode())):
Jiri Olsaac00ac52019-11-15 12:45:59 +0100407 continue
408
409 avg = totals[k].value / counts[k].value
410
411 print("%40s %10lu %6lu %10lu %10lu" % (caller, avg, counts[k].value, maxs[k].value, totals[k].value))
412
Jiri Olsa90b23822019-12-24 01:18:09 +0100413 for addr in stack[2:args.stacks]:
Jiri Olsaac00ac52019-11-15 12:45:59 +0100414 print("%40s" % b.ksym(addr, show_offset=True))
415
416
417if args.tid: # TID trumps PID
418 program = program.replace('FILTER',
419 'if (tid != %s) { return 0; }' % args.tid)
420elif args.pid:
421 program = program.replace('FILTER',
422 'if (pid != %s) { return 0; }' % args.pid)
423else:
424 program = program.replace('FILTER', '')
425
426program = program.replace('STACK_STORAGE_SIZE', str(args.stack_storage_size))
427
428b = BPF(text=program)
429
Jiri Olsada7cac72019-12-23 23:08:27 +0100430if not is_support_kfunc:
431 b.attach_kprobe(event="mutex_unlock", fn_name="mutex_unlock_enter")
432 b.attach_kretprobe(event="mutex_lock", fn_name="mutex_lock_return")
433 b.attach_kprobe(event="mutex_lock", fn_name="mutex_lock_enter")
Jiri Olsaac00ac52019-11-15 12:45:59 +0100434
435enabled = b.get_table("enabled");
436
437stack_traces = b.get_table("stack_traces")
438aq_counts = b.get_table("aq_report_count")
439aq_maxs = b.get_table("aq_report_max")
440aq_totals = b.get_table("aq_report_total")
441
442hl_counts = b.get_table("hl_report_count")
443hl_maxs = b.get_table("hl_report_max")
444hl_totals = b.get_table("hl_report_total")
445
446aq_sort = sort_list(aq_maxs, aq_totals, aq_counts)
447hl_sort = sort_list(hl_maxs, hl_totals, hl_counts)
448
449print("Tracing lock events... Hit Ctrl-C to end.")
450
451# duration and interval are mutualy exclusive
452exiting = 0 if args.interval else 1
453exiting = 1 if args.duration else 0
454
Jerome Marchand10603c72020-06-11 10:08:26 +0200455seconds = 99999999
Jiri Olsaac00ac52019-11-15 12:45:59 +0100456if args.interval:
457 seconds = args.interval
458if args.duration:
459 seconds = args.duration
460
461missing_stacks = 0
462has_enomem = False
463
464while (1):
465 enabled[ct.c_int(0)] = ct.c_int(1)
466
467 try:
468 sleep(seconds)
469 except KeyboardInterrupt:
470 exiting = 1
471
472 enabled[ct.c_int(0)] = ct.c_int(0)
473
474 print("\n%40s %10s %6s %10s %10s" % ("Caller", "Avg Spin", "Count", "Max spin", "Total spin"))
475 display(aq_sort, aq_maxs, aq_totals, aq_counts)
476
477
478 print("\n%40s %10s %6s %10s %10s" % ("Caller", "Avg Hold", "Count", "Max hold", "Total hold"))
479 display(hl_sort, hl_maxs, hl_totals, hl_counts)
480
481 if exiting:
482 break;
483
484 stack_traces.clear()
485 aq_counts.clear()
486 aq_maxs.clear()
487 aq_totals.clear()
488 hl_counts.clear()
489 hl_maxs.clear()
490 hl_totals.clear()
491
492if missing_stacks > 0:
493 enomem_str = " Consider increasing --stack-storage-size."
494 print("WARNING: %d stack traces lost and could not be displayed.%s" %
495 (missing_stacks, (enomem_str if has_enomem else "")),
496 file=stderr)