blob: 07bff3ee5f389c7f1478543f10b5dcf270349aae [file] [log] [blame]
Jiri Olsaac00ac52019-11-15 12:45:59 +01001#!/usr/bin/python
2#
3# lockstats traces lock events and display locks statistics.
4#
5# USAGE: lockstats
6#
7
8from __future__ import print_function
9from bcc import BPF, USDT
10import argparse
11import subprocess
12import ctypes as ct
13from time import sleep, strftime
14from datetime import datetime, timedelta
15import errno
16from sys import stderr
17
18examples = """
19 lockstats # trace system wide
20 lockstats -d 5 # trace for 5 seconds only
21 lockstats -i 5 # display stats every 5 seconds
22 lockstats -p 123 # trace locks for PID 123
23 lockstats -t 321 # trace locks for PID 321
24 lockstats -c pipe_ # display stats only for lock callers with 'pipe_' substring
25 lockstats -S acq_count # sort lock acquired results on acquired count
26 lockstats -S hld_total # sort lock held results on total held time
27 lockstats -S acq_count,hld_total # combination of above
28 lockstats -n 3 # display 3 locks
29 lockstats -s 3 # display 3 levels of stack
30"""
31
32# arg validation
33def positive_int(val):
34 try:
35 ival = int(val)
36 except ValueError:
37 raise argparse.ArgumentTypeError("must be an integer")
38
39 if ival < 0:
40 raise argparse.ArgumentTypeError("must be positive")
41 return ival
42
43def positive_nonzero_int(val):
44 ival = positive_int(val)
45 if ival == 0:
46 raise argparse.ArgumentTypeError("must be nonzero")
47 return ival
48
49def stack_id_err(stack_id):
50 # -EFAULT in get_stackid normally means the stack-trace is not availible,
51 # Such as getting kernel stack trace in userspace code
52 return (stack_id < 0) and (stack_id != -errno.EFAULT)
53
54parser = argparse.ArgumentParser(
55 description="",
56 formatter_class=argparse.RawDescriptionHelpFormatter,
57 epilog=examples)
58
59time_group = parser.add_mutually_exclusive_group()
60time_group.add_argument("-d", "--duration", type=int,
61 help="total duration of trace in seconds")
62time_group.add_argument("-i", "--interval", type=int,
63 help="print summary at this interval (seconds)")
64parser.add_argument("-n", "--locks", type=int, default=999999999,
65 help="print given number of locks")
66parser.add_argument("-s", "--stacks", type=int, default=1,
67 help="print given number of stack entries")
68parser.add_argument("-c", "--caller",
69 help="print locks taken by given caller")
70parser.add_argument("-S", "--sort",
71 help="sort data on <aq_field,hd_field>, fields: acq_[max|total|count] hld_[max|total|count]")
72parser.add_argument("-p", "--pid",
73 help="trace this PID only")
74parser.add_argument("-t", "--tid",
75 help="trace this TID only")
76parser.add_argument("--stack-storage-size", default=16384,
77 type=positive_nonzero_int,
78 help="the number of unique stack traces that can be stored and "
79 "displayed (default 16384)")
80
81args = parser.parse_args()
82
83program = """
84#include <uapi/linux/ptrace.h>
85
86struct depth_id {
87 u64 id;
88 u64 depth;
89};
90
91BPF_ARRAY(enabled, u64, 1);
92BPF_HASH(track, u64, u64);
93BPF_HASH(time_aq, u64, u64);
94BPF_HASH(lock_depth, u64, u64);
95BPF_HASH(time_held, struct depth_id, u64);
96BPF_HASH(stack, struct depth_id, int);
97
98BPF_HASH(aq_report_count, int, u64);
99BPF_HASH(aq_report_max, int, u64);
100BPF_HASH(aq_report_total, int, u64);
101
102BPF_HASH(hl_report_count, int, u64);
103BPF_HASH(hl_report_max, int, u64);
104BPF_HASH(hl_report_total, int, u64);
105
106BPF_STACK_TRACE(stack_traces, STACK_STORAGE_SIZE);
107
108static bool is_enabled(void)
109{
110 int key = 0;
111 u64 *ret;
112
113 ret = enabled.lookup(&key);
114 return ret && *ret == 1;
115}
116
117static bool allow_pid(u64 id)
118{
119 u32 pid = id >> 32; // PID is higher part
120 u32 tid = id; // Cast and get the lower part
121
122 FILTER
123
124 return 1;
125}
126
127int mutex_lock_enter(struct pt_regs *ctx)
128{
129 if (!is_enabled())
130 return 0;
131
132 u64 id = bpf_get_current_pid_tgid();
133
134 if (!allow_pid(id))
135 return 0;
136
137 u64 one = 1, zero = 0;
138
139 track.update(&id, &one);
140
141 u64 *depth = lock_depth.lookup(&id);
142
143 if (!depth) {
144 lock_depth.update(&id, &zero);
145
146 depth = lock_depth.lookup(&id);
147 /* something is wrong.. */
148 if (!depth)
149 return 0;
150 }
151
152 int stackid = stack_traces.get_stackid(ctx, 0);
153 struct depth_id did = {
154 .id = id,
155 .depth = *depth,
156 };
157 stack.update(&did, &stackid);
158
159 u64 ts = bpf_ktime_get_ns();
160 time_aq.update(&id, &ts);
161
162 *depth += 1;
163 return 0;
164}
165
166static void update_aq_report_count(int *stackid)
167{
168 u64 *count, one = 1;
169
170 count = aq_report_count.lookup(stackid);
171 if (!count) {
172 aq_report_count.update(stackid, &one);
173 } else {
174 *count += 1;
175 }
176}
177
178static void update_hl_report_count(int *stackid)
179{
180 u64 *count, one = 1;
181
182 count = hl_report_count.lookup(stackid);
183 if (!count) {
184 hl_report_count.update(stackid, &one);
185 } else {
186 *count += 1;
187 }
188}
189
190static void update_aq_report_max(int *stackid, u64 time)
191{
192 u64 *max;
193
194 max = aq_report_max.lookup(stackid);
195 if (!max || *max < time)
196 aq_report_max.update(stackid, &time);
197}
198
199static void update_hl_report_max(int *stackid, u64 time)
200{
201 u64 *max;
202
203 max = hl_report_max.lookup(stackid);
204 if (!max || *max < time)
205 hl_report_max.update(stackid, &time);
206}
207
208static void update_aq_report_total(int *stackid, u64 delta)
209{
210 u64 *count, *time;
211
212 count = aq_report_count.lookup(stackid);
213 if (!count)
214 return;
215
216 time = aq_report_total.lookup(stackid);
217 if (!time) {
218 aq_report_total.update(stackid, &delta);
219 } else {
220 *time = *time + delta;
221 }
222}
223
224static void update_hl_report_total(int *stackid, u64 delta)
225{
226 u64 *count, *time;
227
228 count = hl_report_count.lookup(stackid);
229 if (!count)
230 return;
231
232 time = hl_report_total.lookup(stackid);
233 if (!time) {
234 hl_report_total.update(stackid, &delta);
235 } else {
236 *time = *time + delta;
237 }
238}
239
240int mutex_lock_return(struct pt_regs *ctx)
241{
242 if (!is_enabled())
243 return 0;
244
245 u64 id = bpf_get_current_pid_tgid();
246
247 if (!allow_pid(id))
248 return 0;
249
250 u64 *one = track.lookup(&id);
251
252 if (!one)
253 return 0;
254
255 track.delete(&id);
256
257 u64 *depth = lock_depth.lookup(&id);
258 if (!depth)
259 return 0;
260
261 struct depth_id did = {
262 .id = id,
263 .depth = *depth - 1,
264 };
265
266 u64 *aq = time_aq.lookup(&id);
267 if (!aq)
268 return 0;
269
270 int *stackid = stack.lookup(&did);
271 if (!stackid)
272 return 0;
273
274 u64 cur = bpf_ktime_get_ns();
275
276 if (cur > *aq) {
277 int val = cur - *aq;
278 update_aq_report_count(stackid);
279 update_aq_report_max(stackid, val);
280 update_aq_report_total(stackid, val);
281 }
282
283 time_held.update(&did, &cur);
284 return 0;
285}
286
287int mutex_unlock_enter(struct pt_regs *ctx)
288{
289 if (!is_enabled())
290 return 0;
291
292 u64 id = bpf_get_current_pid_tgid();
293
294 if (!allow_pid(id))
295 return 0;
296
297 u64 *depth = lock_depth.lookup(&id);
298
299 if (!depth || *depth == 0)
300 return 0;
301
302 *depth -= 1;
303
304 struct depth_id did = {
305 .id = id,
306 .depth = *depth,
307 };
308
309 u64 *held = time_held.lookup(&did);
310 if (!held)
311 return 0;
312
313 int *stackid = stack.lookup(&did);
314 if (!stackid)
315 return 0;
316
317 u64 cur = bpf_ktime_get_ns();
318
319 if (cur > *held) {
320 u64 val = cur - *held;
321 update_hl_report_count(stackid);
322 update_hl_report_max(stackid, val);
323 update_hl_report_total(stackid, val);
324 }
325
326 stack.delete(&did);
327 time_held.delete(&did);
328 return 0;
329}
330
331"""
332
333def sort_list(maxs, totals, counts):
334 if (not args.sort):
335 return maxs;
336
337 for field in args.sort.split(','):
338 if (field == "acq_max" or field == "hld_max"):
339 return maxs
340 if (field == "acq_total" or field == "hld_total"):
341 return totals
342 if (field == "acq_count" or field == "hld_count"):
343 return counts
344
345 print("Wrong sort argument: %s", args.sort)
346 exit(-1)
347
348def display(sort, maxs, totals, counts):
349 global missing_stacks
350 global has_enomem
351
352 for k, v in sorted(sort.items(), key=lambda sort: sort[1].value, reverse=True)[:args.locks]:
353 missing_stacks += int(stack_id_err(k.value))
354 has_enomem = has_enomem or (k.value == -errno.ENOMEM)
355
356 caller = "[Missed Kernel Stack]"
357 stack = []
358
359 if (k.value >= 0):
360 stack = list(stack_traces.walk(k.value))
361 caller = b.ksym(stack[1], show_offset=True)
362
363 if (args.caller and caller.find(args.caller)):
364 continue
365
366 avg = totals[k].value / counts[k].value
367
368 print("%40s %10lu %6lu %10lu %10lu" % (caller, avg, counts[k].value, maxs[k].value, totals[k].value))
369
370 for addr in stack[1:args.stacks]:
371 print("%40s" % b.ksym(addr, show_offset=True))
372
373
374if args.tid: # TID trumps PID
375 program = program.replace('FILTER',
376 'if (tid != %s) { return 0; }' % args.tid)
377elif args.pid:
378 program = program.replace('FILTER',
379 'if (pid != %s) { return 0; }' % args.pid)
380else:
381 program = program.replace('FILTER', '')
382
383program = program.replace('STACK_STORAGE_SIZE', str(args.stack_storage_size))
384
385b = BPF(text=program)
386
387b.attach_kprobe(event="mutex_unlock", fn_name="mutex_unlock_enter")
388b.attach_kretprobe(event="mutex_lock", fn_name="mutex_lock_return")
389b.attach_kprobe(event="mutex_lock", fn_name="mutex_lock_enter")
390
391enabled = b.get_table("enabled");
392
393stack_traces = b.get_table("stack_traces")
394aq_counts = b.get_table("aq_report_count")
395aq_maxs = b.get_table("aq_report_max")
396aq_totals = b.get_table("aq_report_total")
397
398hl_counts = b.get_table("hl_report_count")
399hl_maxs = b.get_table("hl_report_max")
400hl_totals = b.get_table("hl_report_total")
401
402aq_sort = sort_list(aq_maxs, aq_totals, aq_counts)
403hl_sort = sort_list(hl_maxs, hl_totals, hl_counts)
404
405print("Tracing lock events... Hit Ctrl-C to end.")
406
407# duration and interval are mutualy exclusive
408exiting = 0 if args.interval else 1
409exiting = 1 if args.duration else 0
410
411seconds = 999999999
412if args.interval:
413 seconds = args.interval
414if args.duration:
415 seconds = args.duration
416
417missing_stacks = 0
418has_enomem = False
419
420while (1):
421 enabled[ct.c_int(0)] = ct.c_int(1)
422
423 try:
424 sleep(seconds)
425 except KeyboardInterrupt:
426 exiting = 1
427
428 enabled[ct.c_int(0)] = ct.c_int(0)
429
430 print("\n%40s %10s %6s %10s %10s" % ("Caller", "Avg Spin", "Count", "Max spin", "Total spin"))
431 display(aq_sort, aq_maxs, aq_totals, aq_counts)
432
433
434 print("\n%40s %10s %6s %10s %10s" % ("Caller", "Avg Hold", "Count", "Max hold", "Total hold"))
435 display(hl_sort, hl_maxs, hl_totals, hl_counts)
436
437 if exiting:
438 break;
439
440 stack_traces.clear()
441 aq_counts.clear()
442 aq_maxs.clear()
443 aq_totals.clear()
444 hl_counts.clear()
445 hl_maxs.clear()
446 hl_totals.clear()
447
448if missing_stacks > 0:
449 enomem_str = " Consider increasing --stack-storage-size."
450 print("WARNING: %d stack traces lost and could not be displayed.%s" %
451 (missing_stacks, (enomem_str if has_enomem else "")),
452 file=stderr)