blob: 63a2a57274eedc091d6ce2fdca9245762a932d9d [file] [log] [blame]
Alexey Ivanovcc01a9c2019-01-16 09:50:46 -08001#!/usr/bin/python
Alexei Starovoitova79da0a2016-01-09 12:06:40 -08002# @lint-avoid-python-3-compatibility-imports
Brendan Gregg9fa15622015-09-21 15:51:11 -07003#
Alexei Starovoitova79da0a2016-01-09 12:06:40 -08004# biolatency Summarize block device I/O latency as a histogram.
5# For Linux, uses BCC, eBPF.
Brendan Gregg9fa15622015-09-21 15:51:11 -07006#
xingfeng25103d07ee82022-03-10 09:38:16 +08007# USAGE: biolatency [-h] [-T] [-Q] [-m] [-D] [-F] [-e] [-j] [interval] [count]
Brendan Gregg9fa15622015-09-21 15:51:11 -07008#
9# Copyright (c) 2015 Brendan Gregg.
10# Licensed under the Apache License, Version 2.0 (the "License")
11#
Alexei Starovoitova79da0a2016-01-09 12:06:40 -080012# 20-Sep-2015 Brendan Gregg Created this.
Brendan Gregg9fa15622015-09-21 15:51:11 -070013
14from __future__ import print_function
15from bcc import BPF
16from time import sleep, strftime
17import argparse
Omkar Desaiaec53a82020-12-31 11:30:31 -050018import ctypes as ct
Brendan Gregg9fa15622015-09-21 15:51:11 -070019
20# arguments
21examples = """examples:
Omkar Desaiaec53a82020-12-31 11:30:31 -050022 ./biolatency # summarize block I/O latency as a histogram
23 ./biolatency 1 10 # print 1 second summaries, 10 times
24 ./biolatency -mT 1 # 1s summaries, milliseconds, and timestamps
25 ./biolatency -Q # include OS queued time in I/O time
26 ./biolatency -D # show each disk device separately
27 ./biolatency -F # show I/O flags separately
28 ./biolatency -j # print a dictionary
edwardwuf41f3782021-04-28 18:22:48 +080029 ./biolatency -e # show extension summary(total, average)
Brendan Gregg9fa15622015-09-21 15:51:11 -070030"""
31parser = argparse.ArgumentParser(
Alexei Starovoitova79da0a2016-01-09 12:06:40 -080032 description="Summarize block device I/O latency as a histogram",
33 formatter_class=argparse.RawDescriptionHelpFormatter,
Nathan Scott1a197db2018-01-21 09:14:27 +110034 epilog=examples)
Brendan Gregg9fa15622015-09-21 15:51:11 -070035parser.add_argument("-T", "--timestamp", action="store_true",
Alexei Starovoitova79da0a2016-01-09 12:06:40 -080036 help="include timestamp on output")
Brendan Gregg9fa15622015-09-21 15:51:11 -070037parser.add_argument("-Q", "--queued", action="store_true",
Alexei Starovoitova79da0a2016-01-09 12:06:40 -080038 help="include OS queued time in I/O time")
Brendan Gregg9fa15622015-09-21 15:51:11 -070039parser.add_argument("-m", "--milliseconds", action="store_true",
Alexei Starovoitova79da0a2016-01-09 12:06:40 -080040 help="millisecond histogram")
Brendan Gregg44e68772015-09-25 13:20:16 -070041parser.add_argument("-D", "--disks", action="store_true",
Alexei Starovoitova79da0a2016-01-09 12:06:40 -080042 help="print a histogram per disk device")
Brendan Greggc6cded42019-03-20 09:32:20 -070043parser.add_argument("-F", "--flags", action="store_true",
44 help="print a histogram per set of I/O flags")
edwardwuf41f3782021-04-28 18:22:48 +080045parser.add_argument("-e", "--extension", action="store_true",
46 help="summarize average/total value")
Brendan Gregg9fa15622015-09-21 15:51:11 -070047parser.add_argument("interval", nargs="?", default=99999999,
Alexei Starovoitova79da0a2016-01-09 12:06:40 -080048 help="output interval, in seconds")
Brendan Gregg9fa15622015-09-21 15:51:11 -070049parser.add_argument("count", nargs="?", default=99999999,
Alexei Starovoitova79da0a2016-01-09 12:06:40 -080050 help="number of outputs")
Nathan Scottf5fb9af2018-01-17 09:39:59 +110051parser.add_argument("--ebpf", action="store_true",
52 help=argparse.SUPPRESS)
Omkar Desaiaec53a82020-12-31 11:30:31 -050053parser.add_argument("-j", "--json", action="store_true",
54 help="json output")
55
Brendan Gregg9fa15622015-09-21 15:51:11 -070056args = parser.parse_args()
57countdown = int(args.count)
58debug = 0
Omkar Desaiaec53a82020-12-31 11:30:31 -050059
Brendan Greggc6cded42019-03-20 09:32:20 -070060if args.flags and args.disks:
61 print("ERROR: can only use -D or -F. Exiting.")
62 exit()
Brendan Gregg9fa15622015-09-21 15:51:11 -070063
Brendan Gregg44e68772015-09-25 13:20:16 -070064# define BPF program
Brendan Gregg9fa15622015-09-21 15:51:11 -070065bpf_text = """
66#include <uapi/linux/ptrace.h>
Jerome Marchandee810722022-02-23 16:04:30 +010067#include <linux/blk-mq.h>
Brendan Gregg9fa15622015-09-21 15:51:11 -070068
Brendan Gregg44e68772015-09-25 13:20:16 -070069typedef struct disk_key {
Alexei Starovoitova79da0a2016-01-09 12:06:40 -080070 char disk[DISK_NAME_LEN];
71 u64 slot;
Brendan Gregg44e68772015-09-25 13:20:16 -070072} disk_key_t;
Brendan Greggc6cded42019-03-20 09:32:20 -070073
74typedef struct flag_key {
75 u64 flags;
76 u64 slot;
77} flag_key_t;
78
edwardwuf41f3782021-04-28 18:22:48 +080079typedef struct ext_val {
80 u64 total;
81 u64 count;
82} ext_val_t;
83
Brendan Gregg9fa15622015-09-21 15:51:11 -070084BPF_HASH(start, struct request *);
Brendan Gregg44e68772015-09-25 13:20:16 -070085STORAGE
Brendan Gregg9fa15622015-09-21 15:51:11 -070086
87// time block I/O
88int trace_req_start(struct pt_regs *ctx, struct request *req)
89{
Alexei Starovoitova79da0a2016-01-09 12:06:40 -080090 u64 ts = bpf_ktime_get_ns();
91 start.update(&req, &ts);
92 return 0;
Brendan Gregg9fa15622015-09-21 15:51:11 -070093}
94
95// output
gurugioc5a448a2019-01-04 17:54:39 +010096int trace_req_done(struct pt_regs *ctx, struct request *req)
Brendan Gregg9fa15622015-09-21 15:51:11 -070097{
Alexei Starovoitova79da0a2016-01-09 12:06:40 -080098 u64 *tsp, delta;
Brendan Gregg9fa15622015-09-21 15:51:11 -070099
Alexei Starovoitova79da0a2016-01-09 12:06:40 -0800100 // fetch timestamp and calculate delta
101 tsp = start.lookup(&req);
102 if (tsp == 0) {
103 return 0; // missed issue
104 }
105 delta = bpf_ktime_get_ns() - *tsp;
edwardwuf41f3782021-04-28 18:22:48 +0800106
107 EXTENSION
108
Alexei Starovoitova79da0a2016-01-09 12:06:40 -0800109 FACTOR
Brendan Gregg9fa15622015-09-21 15:51:11 -0700110
Alexei Starovoitova79da0a2016-01-09 12:06:40 -0800111 // store as histogram
112 STORE
Brendan Gregg9fa15622015-09-21 15:51:11 -0700113
Alexei Starovoitova79da0a2016-01-09 12:06:40 -0800114 start.delete(&req);
115 return 0;
Brendan Gregg9fa15622015-09-21 15:51:11 -0700116}
117"""
Brendan Gregg44e68772015-09-25 13:20:16 -0700118
119# code substitutions
Brendan Gregg9fa15622015-09-21 15:51:11 -0700120if args.milliseconds:
Alexei Starovoitova79da0a2016-01-09 12:06:40 -0800121 bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000000;')
122 label = "msecs"
Brendan Gregg9fa15622015-09-21 15:51:11 -0700123else:
Alexei Starovoitova79da0a2016-01-09 12:06:40 -0800124 bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000;')
125 label = "usecs"
edwardwuf41f3782021-04-28 18:22:48 +0800126
127storage_str = ""
128store_str = ""
Brendan Gregg44e68772015-09-25 13:20:16 -0700129if args.disks:
edwardwuf41f3782021-04-28 18:22:48 +0800130 storage_str += "BPF_HISTOGRAM(dist, disk_key_t);"
Tejun Heoddfcc292022-03-10 08:37:21 -1000131 disks_str = """
edwardwuf41f3782021-04-28 18:22:48 +0800132 disk_key_t key = {.slot = bpf_log2l(delta)};
Tejun Heoddfcc292022-03-10 08:37:21 -1000133 void *__tmp = (void *)req->__RQ_DISK__->disk_name;
edwardwuf41f3782021-04-28 18:22:48 +0800134 bpf_probe_read(&key.disk, sizeof(key.disk), __tmp);
zcy80242fb2021-07-02 00:12:32 +0800135 dist.atomic_increment(key);
edwardwuf41f3782021-04-28 18:22:48 +0800136 """
Tejun Heoddfcc292022-03-10 08:37:21 -1000137 if BPF.kernel_struct_has_field(b'request', b'rq_disk'):
138 store_str += disks_str.replace('__RQ_DISK__', 'rq_disk')
139 else:
140 store_str += disks_str.replace('__RQ_DISK__', 'q->disk')
Brendan Greggc6cded42019-03-20 09:32:20 -0700141elif args.flags:
edwardwuf41f3782021-04-28 18:22:48 +0800142 storage_str += "BPF_HISTOGRAM(dist, flag_key_t);"
143 store_str += """
144 flag_key_t key = {.slot = bpf_log2l(delta)};
145 key.flags = req->cmd_flags;
zcy80242fb2021-07-02 00:12:32 +0800146 dist.atomic_increment(key);
edwardwuf41f3782021-04-28 18:22:48 +0800147 """
Brendan Gregg44e68772015-09-25 13:20:16 -0700148else:
edwardwuf41f3782021-04-28 18:22:48 +0800149 storage_str += "BPF_HISTOGRAM(dist);"
zcy80242fb2021-07-02 00:12:32 +0800150 store_str += "dist.atomic_increment(bpf_log2l(delta));"
edwardwuf41f3782021-04-28 18:22:48 +0800151
152if args.extension:
153 storage_str += "BPF_ARRAY(extension, ext_val_t, 1);"
154 bpf_text = bpf_text.replace('EXTENSION', """
155 u32 index = 0;
156 ext_val_t *ext_val = extension.lookup(&index);
157 if (ext_val) {
158 lock_xadd(&ext_val->total, delta);
159 lock_xadd(&ext_val->count, 1);
160 }
161 """)
162else:
163 bpf_text = bpf_text.replace('EXTENSION', '')
164bpf_text = bpf_text.replace("STORAGE", storage_str)
165bpf_text = bpf_text.replace("STORE", store_str)
166
Nathan Scottca4ba552018-01-16 11:02:58 +1100167if debug or args.ebpf:
Alexei Starovoitova79da0a2016-01-09 12:06:40 -0800168 print(bpf_text)
Nathan Scottca4ba552018-01-16 11:02:58 +1100169 if args.ebpf:
170 exit()
Brendan Gregg9fa15622015-09-21 15:51:11 -0700171
172# load BPF program
173b = BPF(text=bpf_text)
174if args.queued:
Hengqi Chen8c80b292021-12-11 17:36:17 +0800175 if BPF.get_kprobe_functions(b'__blk_account_io_start'):
176 b.attach_kprobe(event="__blk_account_io_start", fn_name="trace_req_start")
177 else:
178 b.attach_kprobe(event="blk_account_io_start", fn_name="trace_req_start")
Brendan Gregg9fa15622015-09-21 15:51:11 -0700179else:
Jerome Marchand74e25ed2018-12-10 08:54:50 +0100180 if BPF.get_kprobe_functions(b'blk_start_request'):
181 b.attach_kprobe(event="blk_start_request", fn_name="trace_req_start")
Alexei Starovoitova79da0a2016-01-09 12:06:40 -0800182 b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_req_start")
Hengqi Chen8c80b292021-12-11 17:36:17 +0800183if BPF.get_kprobe_functions(b'__blk_account_io_done'):
184 b.attach_kprobe(event="__blk_account_io_done", fn_name="trace_req_done")
185else:
186 b.attach_kprobe(event="blk_account_io_done", fn_name="trace_req_done")
Brendan Gregg9fa15622015-09-21 15:51:11 -0700187
Omkar Desaiaec53a82020-12-31 11:30:31 -0500188if not args.json:
189 print("Tracing block device I/O... Hit Ctrl-C to end.")
Brendan Gregg9fa15622015-09-21 15:51:11 -0700190
xingfeng25103d07ee82022-03-10 09:38:16 +0800191def disk_print(s):
192 disk = s.decode('utf-8', 'replace')
193 if not disk:
194 disk = "<unknown>"
195 return disk
196
Brendan Greggc6cded42019-03-20 09:32:20 -0700197# see blk_fill_rwbs():
198req_opf = {
199 0: "Read",
200 1: "Write",
201 2: "Flush",
202 3: "Discard",
203 5: "SecureErase",
204 6: "ZoneReset",
205 7: "WriteSame",
206 9: "WriteZeros"
207}
208REQ_OP_BITS = 8
209REQ_OP_MASK = ((1 << REQ_OP_BITS) - 1)
210REQ_SYNC = 1 << (REQ_OP_BITS + 3)
211REQ_META = 1 << (REQ_OP_BITS + 4)
212REQ_PRIO = 1 << (REQ_OP_BITS + 5)
213REQ_NOMERGE = 1 << (REQ_OP_BITS + 6)
214REQ_IDLE = 1 << (REQ_OP_BITS + 7)
215REQ_FUA = 1 << (REQ_OP_BITS + 9)
216REQ_RAHEAD = 1 << (REQ_OP_BITS + 11)
217REQ_BACKGROUND = 1 << (REQ_OP_BITS + 12)
218REQ_NOWAIT = 1 << (REQ_OP_BITS + 13)
219def flags_print(flags):
220 desc = ""
221 # operation
222 if flags & REQ_OP_MASK in req_opf:
223 desc = req_opf[flags & REQ_OP_MASK]
224 else:
225 desc = "Unknown"
226 # flags
227 if flags & REQ_SYNC:
228 desc = "Sync-" + desc
229 if flags & REQ_META:
230 desc = "Metadata-" + desc
231 if flags & REQ_FUA:
232 desc = "ForcedUnitAccess-" + desc
233 if flags & REQ_PRIO:
234 desc = "Priority-" + desc
235 if flags & REQ_NOMERGE:
236 desc = "NoMerge-" + desc
237 if flags & REQ_IDLE:
238 desc = "Idle-" + desc
239 if flags & REQ_RAHEAD:
240 desc = "ReadAhead-" + desc
241 if flags & REQ_BACKGROUND:
242 desc = "Background-" + desc
243 if flags & REQ_NOWAIT:
244 desc = "NoWait-" + desc
245 return desc
246
Yonghong Song17e14ef2020-10-01 13:34:43 -0700247# output
248exiting = 0 if args.interval else 1
249dist = b.get_table("dist")
edwardwuf41f3782021-04-28 18:22:48 +0800250if args.extension:
251 extension = b.get_table("extension")
Yonghong Song17e14ef2020-10-01 13:34:43 -0700252while (1):
253 try:
254 sleep(int(args.interval))
255 except KeyboardInterrupt:
256 exiting = 1
257
Alexei Starovoitova79da0a2016-01-09 12:06:40 -0800258 print()
Omkar Desaiaec53a82020-12-31 11:30:31 -0500259 if args.json:
260 if args.timestamp:
261 print("%-8s\n" % strftime("%H:%M:%S"), end="")
Brendan Gregg9fa15622015-09-21 15:51:11 -0700262
Omkar Desaiaec53a82020-12-31 11:30:31 -0500263 if args.flags:
Omkar Desaibb6e9312021-01-04 15:16:39 -0500264 dist.print_json_hist(label, "flags", flags_print)
Omkar Desaiaec53a82020-12-31 11:30:31 -0500265 else:
xingfeng25103d07ee82022-03-10 09:38:16 +0800266 dist.print_json_hist(label, "disk", disk_print)
Omkar Desaiaec53a82020-12-31 11:30:31 -0500267
Brendan Greggc6cded42019-03-20 09:32:20 -0700268 else:
Omkar Desaiaec53a82020-12-31 11:30:31 -0500269 if args.timestamp:
270 print("%-8s\n" % strftime("%H:%M:%S"), end="")
zcy80242fb2021-07-02 00:12:32 +0800271
Omkar Desaiaec53a82020-12-31 11:30:31 -0500272 if args.flags:
273 dist.print_log2_hist(label, "flags", flags_print)
Omkar Desaiaec53a82020-12-31 11:30:31 -0500274 else:
xingfeng25103d07ee82022-03-10 09:38:16 +0800275 dist.print_log2_hist(label, "disk", disk_print)
edwardwuf41f3782021-04-28 18:22:48 +0800276 if args.extension:
277 total = extension[0].total
278 counts = extension[0].count
279 if counts > 0:
280 if label == 'msecs':
281 total /= 1000000
282 elif label == 'usecs':
283 total /= 1000
284 avg = total / counts
285 print("\navg = %ld %s, total: %ld %s, count: %ld\n" %
286 (total / counts, label, total, label, counts))
287 extension.clear()
Omkar Desaiaec53a82020-12-31 11:30:31 -0500288
Alexei Starovoitova79da0a2016-01-09 12:06:40 -0800289 dist.clear()
Brendan Gregg9fa15622015-09-21 15:51:11 -0700290
Alexei Starovoitova79da0a2016-01-09 12:06:40 -0800291 countdown -= 1
292 if exiting or countdown == 0:
Yonghong Song17e14ef2020-10-01 13:34:43 -0700293 exit()