blob: 51ecd09febe09e1b181f521246f8a51ca4865d35 [file] [log] [blame]
Brendan Gregg6f075b92016-02-07 00:46:34 -08001#!/usr/bin/python
2# @lint-avoid-python-3-compatibility-imports
3#
4# biotop block device (disk) I/O by process.
5# For Linux, uses BCC, eBPF.
6#
7# USAGE: biotop.py [-h] [-C] [-r MAXROWS] [interval] [count]
8#
9# This uses in-kernel eBPF maps to cache process details (PID and comm) by I/O
10# request, as well as a starting timestamp for calculating I/O latency.
11#
12# Copyright 2016 Netflix, Inc.
13# Licensed under the Apache License, Version 2.0 (the "License")
14#
15# 06-Feb-2016 Brendan Gregg Created this.
16
17from __future__ import print_function
18from bcc import BPF
19from time import sleep, strftime
20import argparse
21import signal
22from subprocess import call
23
24# arguments
25examples = """examples:
26 ./biotop # block device I/O top, 1 second refresh
27 ./biotop -C # don't clear the screen
28 ./biotop 5 # 5 second summaries
29 ./biotop 5 10 # 5 second summaries, 10 times only
30"""
31parser = argparse.ArgumentParser(
32 description="Block device (disk) I/O by process",
33 formatter_class=argparse.RawDescriptionHelpFormatter,
34 epilog=examples)
35parser.add_argument("-C", "--noclear", action="store_true",
36 help="don't clear the screen")
37parser.add_argument("-r", "--maxrows", default=20,
38 help="maximum rows to print, default 20")
39parser.add_argument("interval", nargs="?", default=1,
40 help="output interval, in seconds")
41parser.add_argument("count", nargs="?", default=99999999,
42 help="number of outputs")
43args = parser.parse_args()
44interval = int(args.interval)
45countdown = int(args.count)
46maxrows = int(args.maxrows)
47clear = not int(args.noclear)
48
49# linux stats
50loadavg = "/proc/loadavg"
51diskstats = "/proc/diskstats"
52
53# signal handler
54def signal_ignore(signal, frame):
55 print()
56
57# load BPF program
58b = BPF(text="""
59#include <uapi/linux/ptrace.h>
60#include <linux/blkdev.h>
61
Brendan Gregg6321d002016-02-07 00:54:44 -080062// for saving process info by request
Brendan Gregg6f075b92016-02-07 00:46:34 -080063struct who_t {
64 u32 pid;
65 char name[TASK_COMM_LEN];
66};
67
Brendan Gregg6321d002016-02-07 00:54:44 -080068// the key for the output summary
Brendan Gregg6f075b92016-02-07 00:46:34 -080069struct info_t {
70 u32 pid;
71 int type;
72 int major;
73 int minor;
74 char name[TASK_COMM_LEN];
75};
76
Brendan Gregg6321d002016-02-07 00:54:44 -080077// the value of the output summary
Brendan Gregg6f075b92016-02-07 00:46:34 -080078struct val_t {
79 u64 bytes;
80 u64 us;
81 u32 io;
82};
83
84BPF_HASH(start, struct request *);
85BPF_HASH(whobyreq, struct request *, struct who_t);
86BPF_HASH(counts, struct info_t, struct val_t);
87
88// cache PID and comm by-req
89int trace_pid_start(struct pt_regs *ctx, struct request *req)
90{
91 struct who_t who = {};
92
93 if (bpf_get_current_comm(&who.name, sizeof(who.name)) == 0) {
94 who.pid = bpf_get_current_pid_tgid();
95 whobyreq.update(&req, &who);
96 }
97
98 return 0;
99}
100
101// time block I/O
102int trace_req_start(struct pt_regs *ctx, struct request *req)
103{
104 u64 ts;
105
106 ts = bpf_ktime_get_ns();
107 start.update(&req, &ts);
108
109 return 0;
110}
111
112// output
113int trace_req_completion(struct pt_regs *ctx, struct request *req)
114{
115 u64 *tsp;
116
117 // fetch timestamp and calculate delta
118 tsp = start.lookup(&req);
119 if (tsp == 0) {
120 return 0; // missed tracing issue
121 }
122
123 struct who_t *whop;
124 struct val_t *valp, zero = {};
125 u64 delta_us = (bpf_ktime_get_ns() - *tsp) / 1000;
126
127 // setup info_t key
128 struct info_t info = {};
129 info.major = req->rq_disk->major;
130 info.minor = req->rq_disk->first_minor;
131 info.type = req->cmd_flags & REQ_WRITE;
132 whop = whobyreq.lookup(&req);
133 if (whop == 0) {
134 // missed pid who, save stats as pid 0
135 valp = counts.lookup_or_init(&info, &zero);
136 } else {
137 info.pid = whop->pid;
138 __builtin_memcpy(&info.name, whop->name, sizeof(info.name));
139 valp = counts.lookup_or_init(&info, &zero);
140 }
141
142 // save stats
143 valp->us += delta_us;
144 valp->bytes += req->__data_len;
145 valp->io++;
146
147 start.delete(&req);
148 whobyreq.delete(&req);
149
150 return 0;
151}
152""", debug=0)
153b.attach_kprobe(event="blk_account_io_start", fn_name="trace_pid_start")
154b.attach_kprobe(event="blk_start_request", fn_name="trace_req_start")
155b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_req_start")
156b.attach_kprobe(event="blk_account_io_completion",
157 fn_name="trace_req_completion")
158
159print('Tracing... Output every %d secs. Hit Ctrl-C to end' % interval)
160
161# cache disk major,minor -> diskname
162disklookup = {}
163with open(diskstats) as stats:
164 for line in stats:
165 a = line.split()
166 disklookup[a[0] + "," + a[1]] = a[2]
167
168# output
169exiting = 0
170while 1:
171 try:
172 sleep(interval)
173 except KeyboardInterrupt:
174 exiting = 1
175
176 # header
177 if clear:
178 call("clear")
179 else:
180 print()
181 with open(loadavg) as stats:
182 print("%-8s loadavg: %s" % (strftime("%H:%M:%S"), stats.read()))
183 print("%-6s %-16s %1s %-3s %-3s %-8s %5s %7s %6s" % ("PID", "COMM",
184 "D", "MAJ", "MIN", "DISK", "I/O", "Kbytes", "AVGms"))
185
186 # by-PID output
187 counts = b.get_table("counts")
188 line = 0
189 for k, v in reversed(sorted(counts.items(),
190 key=lambda counts: counts[1].bytes)):
191
192 # lookup disk
193 disk = str(k.major) + "," + str(k.minor)
194 if disk in disklookup:
195 diskname = disklookup[disk]
196 else:
197 diskname = "?"
198
199 # print line
200 avg_ms = (float(v.us) / 1000) / v.io
201 print("%-6d %-16s %1s %-3d %-3d %-8s %5s %7s %6.2f" % (k.pid, k.name,
202 "W" if k.type else "R", k.major, k.minor, diskname, v.io,
203 v.bytes / 1024, avg_ms))
204
205 line += 1
206 if line >= maxrows:
207 break
208 counts.clear()
209
210 countdown -= 1
211 if exiting or countdown == 0:
212 print("Detaching...")
213 exit()