blob: 1d12f9e8d1fcd5bbdf3197dc1d4d3c02511d850c [file] [log] [blame]
Brendan Gregg5be51942015-09-16 15:09:04 -07001#!/usr/bin/python
Alexei Starovoitova79da0a2016-01-09 12:06:40 -08002# @lint-avoid-python-3-compatibility-imports
Brendan Gregg5be51942015-09-16 15:09:04 -07003#
Alexei Starovoitova79da0a2016-01-09 12:06:40 -08004# biosnoop Trace block device I/O and print details including issuing PID.
5# For Linux, uses BCC, eBPF.
Brendan Gregg5be51942015-09-16 15:09:04 -07006#
7# This uses in-kernel eBPF maps to cache process details (PID and comm) by I/O
8# request, as well as a starting timestamp for calculating I/O latency.
9#
10# Copyright (c) 2015 Brendan Gregg.
11# Licensed under the Apache License, Version 2.0 (the "License")
12#
Alexei Starovoitova79da0a2016-01-09 12:06:40 -080013# 16-Sep-2015 Brendan Gregg Created this.
Brendan Gregg5be51942015-09-16 15:09:04 -070014
15from __future__ import print_function
16from bcc import BPF
17
18# load BPF program
19b = BPF(text="""
20#include <uapi/linux/ptrace.h>
21#include <linux/blkdev.h>
22
23struct val_t {
Alexei Starovoitova79da0a2016-01-09 12:06:40 -080024 char name[TASK_COMM_LEN];
Brendan Gregg5be51942015-09-16 15:09:04 -070025};
26
27BPF_HASH(start, struct request *);
28BPF_HASH(pidbyreq, struct request *, u32);
29BPF_HASH(commbyreq, struct request *, struct val_t);
30
31// cache PID and comm by-req
32int trace_pid_start(struct pt_regs *ctx, struct request *req)
33{
Alexei Starovoitova79da0a2016-01-09 12:06:40 -080034 u32 pid;
35 struct val_t val = {};
Brendan Gregg5be51942015-09-16 15:09:04 -070036
Alexei Starovoitova79da0a2016-01-09 12:06:40 -080037 pid = bpf_get_current_pid_tgid();
38 pidbyreq.update(&req, &pid);
39 if (bpf_get_current_comm(&val.name, sizeof(val.name)) == 0) {
40 commbyreq.update(&req, &val);
41 }
Brendan Gregg5be51942015-09-16 15:09:04 -070042
Alexei Starovoitova79da0a2016-01-09 12:06:40 -080043 return 0;
Brendan Gregg5be51942015-09-16 15:09:04 -070044}
45
46// time block I/O
47int trace_req_start(struct pt_regs *ctx, struct request *req)
48{
Alexei Starovoitova79da0a2016-01-09 12:06:40 -080049 u64 ts;
Brendan Gregg5be51942015-09-16 15:09:04 -070050
Alexei Starovoitova79da0a2016-01-09 12:06:40 -080051 ts = bpf_ktime_get_ns();
52 start.update(&req, &ts);
Brendan Gregg5be51942015-09-16 15:09:04 -070053
Alexei Starovoitova79da0a2016-01-09 12:06:40 -080054 return 0;
Brendan Gregg5be51942015-09-16 15:09:04 -070055}
56
57// output
58int trace_req_completion(struct pt_regs *ctx, struct request *req)
59{
Alexei Starovoitova79da0a2016-01-09 12:06:40 -080060 u64 *tsp, delta;
61 u32 *pidp = 0;
62 struct val_t *valp;
Brendan Gregg5be51942015-09-16 15:09:04 -070063
Alexei Starovoitova79da0a2016-01-09 12:06:40 -080064 // fetch timestamp and calculate delta
65 tsp = start.lookup(&req);
66 if (tsp == 0) {
67 // missed tracing issue
68 return 0;
69 }
70 delta = bpf_ktime_get_ns() - *tsp;
Brendan Gregg5be51942015-09-16 15:09:04 -070071
Alexei Starovoitova79da0a2016-01-09 12:06:40 -080072 //
73 // Fetch and output issuing pid and comm.
74 // As bpf_trace_prink() is limited to a maximum of 1 string and 2
75 // integers, we'll use more than one to output the data.
76 //
77 valp = commbyreq.lookup(&req);
78 pidp = pidbyreq.lookup(&req);
79 if (pidp == 0 || valp == 0) {
80 bpf_trace_printk("0 0 ? %d\\n", req->__data_len);
81 } else {
82 bpf_trace_printk("0 %d %s %d\\n", *pidp, valp->name,
83 req->__data_len);
84 }
Brendan Gregg5be51942015-09-16 15:09:04 -070085
Alexei Starovoitova79da0a2016-01-09 12:06:40 -080086 // output remaining details
87 if (req->cmd_flags & REQ_WRITE) {
88 bpf_trace_printk("1 W %s %d %d ?\\n", req->rq_disk->disk_name,
89 req->__sector, delta / 1000);
90 } else {
91 bpf_trace_printk("1 R %s %d %d ?\\n", req->rq_disk->disk_name,
92 req->__sector, delta / 1000);
93 }
Brendan Gregg5be51942015-09-16 15:09:04 -070094
Alexei Starovoitova79da0a2016-01-09 12:06:40 -080095 start.delete(&req);
96 pidbyreq.delete(&req);
97 commbyreq.delete(&req);
Brendan Gregg5be51942015-09-16 15:09:04 -070098
Alexei Starovoitova79da0a2016-01-09 12:06:40 -080099 return 0;
Brendan Gregg5be51942015-09-16 15:09:04 -0700100}
Alexei Starovoitova79da0a2016-01-09 12:06:40 -0800101""", debug=0)
Brendan Gregg5be51942015-09-16 15:09:04 -0700102b.attach_kprobe(event="blk_account_io_start", fn_name="trace_pid_start")
103b.attach_kprobe(event="blk_start_request", fn_name="trace_req_start")
104b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_req_start")
105b.attach_kprobe(event="blk_account_io_completion",
106 fn_name="trace_req_completion")
107
108# header
109print("%-14s %-14s %-6s %-7s %-2s %-9s %-7s %7s" % ("TIME(s)", "COMM", "PID",
110 "DISK", "T", "SECTOR", "BYTES", "LAT(ms)"))
111
112start_ts = 0
113
114# format output
115while 1:
Alexei Starovoitova79da0a2016-01-09 12:06:40 -0800116 (task, pid, cpu, flags, ts, msg) = b.trace_fields()
117 args = msg.split(" ")
Brendan Gregg5be51942015-09-16 15:09:04 -0700118
Alexei Starovoitova79da0a2016-01-09 12:06:40 -0800119 if start_ts == 0:
120 start_ts = ts
Brendan Gregg5be51942015-09-16 15:09:04 -0700121
Alexei Starovoitova79da0a2016-01-09 12:06:40 -0800122 if args[0] == "0":
123 (real_pid, real_comm, bytes_s) = (args[1], args[2], args[3])
124 continue
125 else:
126 (type_s, disk_s, sector_s, us_s) = (args[1], args[2], args[3],
127 args[4])
Brendan Gregg5be51942015-09-16 15:09:04 -0700128
Alexei Starovoitova79da0a2016-01-09 12:06:40 -0800129 ms = float(int(us_s, 10)) / 1000
Brendan Gregg5be51942015-09-16 15:09:04 -0700130
Alexei Starovoitova79da0a2016-01-09 12:06:40 -0800131 print("%-14.9f %-14.14s %-6s %-7s %-2s %-9s %-7s %7.2f" % (
132 ts - start_ts, real_comm, real_pid, disk_s, type_s, sector_s,
133 bytes_s, ms))