blob: 705212ee760280f0ea18137d3fdde7f20358b298 [file] [log] [blame]
Mark Drayton75291d02016-04-20 14:11:00 -07001#!/usr/bin/env bcc-lua
2--[[
3Copyright 2016 GitHub, Inc
4
5Licensed under the Apache License, Version 2.0 (the "License");
6you may not use this file except in compliance with the License.
7You may obtain a copy of the License at
8
9http://www.apache.org/licenses/LICENSE-2.0
10
11Unless required by applicable law or agreed to in writing, software
12distributed under the License is distributed on an "AS IS" BASIS,
13WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14See the License for the specific language governing permissions and
15limitations under the License.
16--]]
17
18local program = [[
19#include <uapi/linux/ptrace.h>
20#include <linux/blkdev.h>
21
22struct val_t {
23 u32 pid;
24 char name[TASK_COMM_LEN];
25};
26
27struct data_t {
28 u32 pid;
29 u64 rwflag;
30 u64 delta;
31 u64 sector;
32 u64 len;
33 u64 ts;
34 char disk_name[DISK_NAME_LEN];
35 char name[TASK_COMM_LEN];
36};
37
38BPF_HASH(start, struct request *);
39BPF_HASH(infobyreq, struct request *, struct val_t);
40BPF_PERF_OUTPUT(events);
41
42// cache PID and comm by-req
43int trace_pid_start(struct pt_regs *ctx, struct request *req)
44{
45 struct val_t val = {};
46
47 if (bpf_get_current_comm(&val.name, sizeof(val.name)) == 0) {
48 val.pid = bpf_get_current_pid_tgid();
49 infobyreq.update(&req, &val);
50 }
51 return 0;
52}
53
54// time block I/O
55int trace_req_start(struct pt_regs *ctx, struct request *req)
56{
57 u64 ts;
58
59 ts = bpf_ktime_get_ns();
60 start.update(&req, &ts);
61
62 return 0;
63}
64
65// output
66int trace_req_completion(struct pt_regs *ctx, struct request *req)
67{
68 u64 *tsp, delta;
69 u32 *pidp = 0;
70 struct val_t *valp;
71 struct data_t data ={};
72 u64 ts;
73
74 // fetch timestamp and calculate delta
75 tsp = start.lookup(&req);
76 if (tsp == 0) {
77 // missed tracing issue
78 return 0;
79 }
80 ts = bpf_ktime_get_ns();
81 data.delta = ts - *tsp;
82 data.ts = ts / 1000;
83
84 valp = infobyreq.lookup(&req);
85 if (valp == 0) {
86 data.len = req->__data_len;
87 strcpy(data.name,"?");
88 } else {
89 data.pid = valp->pid;
90 data.len = req->__data_len;
91 data.sector = req->__sector;
92 bpf_probe_read(&data.name, sizeof(data.name), valp->name);
93 bpf_probe_read(&data.disk_name, sizeof(data.disk_name),
94 req->rq_disk->disk_name);
95 }
96
Teng Qin14698772018-02-28 23:00:10 -080097/*
98 * The following deals with a kernel version change (in mainline 4.7, although
99 * it may be backported to earlier kernels) with how block request write flags
100 * are tested. We handle both pre- and post-change versions here. Please avoid
101 * kernel version tests like this as much as possible: they inflate the code,
102 * test, and maintenance burden.
103 */
104#ifdef REQ_WRITE
105 data.rwflag = !!(req->cmd_flags & REQ_WRITE);
106#elif defined(REQ_OP_SHIFT)
107 data.rwflag = !!((req->cmd_flags >> REQ_OP_SHIFT) == REQ_OP_WRITE);
108#else
109 data.rwflag = !!((req->cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE);
110#endif
111
Mark Drayton75291d02016-04-20 14:11:00 -0700112 events.perf_submit(ctx,&data,sizeof(data));
113 start.delete(&req);
114 infobyreq.delete(&req);
115
116 return 0;
117}
118]]
119
120local ffi = require("ffi")
121
122return function(BPF, utils)
123 local bpf = BPF:new{text=program}
124
125 bpf:attach_kprobe{event="blk_account_io_start", fn_name="trace_pid_start"}
126 bpf:attach_kprobe{event="blk_start_request", fn_name="trace_req_start"}
127 bpf:attach_kprobe{event="blk_mq_start_request", fn_name="trace_req_start"}
128 bpf:attach_kprobe{event="blk_account_io_completion",
129 fn_name="trace_req_completion"}
130
131 print("%-14s %-14s %-6s %-7s %-2s %-9s %-7s %7s" % {"TIME(s)", "COMM", "PID",
132 "DISK", "T", "SECTOR", "BYTES", "LAT(ms)"})
133
134 local rwflg = ""
135 local start_ts = 0
136 local prev_ts = 0
137 local delta = 0
138
139 local function print_event(cpu, event)
140 local val = -1
141 local event_pid = event.pid
142 local event_delta = tonumber(event.delta)
143 local event_sector = tonumber(event.sector)
144 local event_len = tonumber(event.len)
145 local event_ts = tonumber(event.ts)
146 local event_disk_name = ffi.string(event.disk_name)
147 local event_name = ffi.string(event.name)
148
149 if event.rwflag == 1 then
150 rwflg = "W"
151 end
152
153 if event.rwflag == 0 then
154 rwflg = "R"
155 end
156
157 if not event_name:match("%?") then
158 val = event_sector
159 end
160
161 if start_ts == 0 then
162 prev_ts = start_ts
163 end
164
165 if start_ts == 1 then
166 delta = delta + (event_ts - prev_ts)
167 end
168
169 print("%-14.9f %-14.14s %-6s %-7s %-2s %-9s %-7s %7.2f" % {
170 delta / 1000000, event_name, event_pid, event_disk_name, rwflg, val,
171 event_len, event_delta / 1000000})
172
173 prev_ts = event_ts
174 start_ts = 1
175 end
176
177 local TASK_COMM_LEN = 16 -- linux/sched.h
178 local DISK_NAME_LEN = 32 -- linux/genhd.h
179
180 bpf:get_table("events"):open_perf_buffer(print_event, [[
181 struct {
182 uint32_t pid;
183 uint64_t rwflag;
184 uint64_t delta;
185 uint64_t sector;
186 uint64_t len;
187 uint64_t ts;
Mark Drayton5f5687e2017-02-20 18:13:03 +0000188 char disk_name[$];
189 char name[$];
Mark Drayton75291d02016-04-20 14:11:00 -0700190 }
Mark Drayton5f5687e2017-02-20 18:13:03 +0000191 ]], {DISK_NAME_LEN, TASK_COMM_LEN}, 64)
Teng Qindbf00292018-02-28 21:47:50 -0800192 bpf:perf_buffer_poll_loop()
Mark Drayton75291d02016-04-20 14:11:00 -0700193end