Alexey Ivanov | cc01a9c | 2019-01-16 09:50:46 -0800 | [diff] [blame] | 1 | #!/usr/bin/python |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 2 | # @lint-avoid-python-3-compatibility-imports |
| 3 | # |
Brendan Gregg | 75d3e9d | 2016-02-07 18:48:20 -0800 | [diff] [blame] | 4 | # fileslower Trace slow synchronous file reads and writes. |
| 5 | # For Linux, uses BCC, eBPF. |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 6 | # |
Mark Drayton | 7434731 | 2016-08-25 20:46:35 +0100 | [diff] [blame] | 7 | # USAGE: fileslower [-h] [-p PID] [-a] [min_ms] |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 8 | # |
| 9 | # This script uses kernel dynamic tracing of synchronous reads and writes |
Brendan Gregg | 75d3e9d | 2016-02-07 18:48:20 -0800 | [diff] [blame] | 10 | # at the VFS interface, to identify slow file reads and writes for any file |
| 11 | # system. |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 12 | # |
| 13 | # This works by tracing __vfs_read() and __vfs_write(), and filtering for |
| 14 | # synchronous I/O (the path to new_sync_read() and new_sync_write()), and |
| 15 | # for I/O with filenames. This approach provides a view of just two file |
| 16 | # system request types. There are typically many others: asynchronous I/O, |
| 17 | # directory operations, file handle operations, etc, that this tool does not |
Brendan Gregg | 75d3e9d | 2016-02-07 18:48:20 -0800 | [diff] [blame] | 18 | # instrument. |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 19 | # |
| 20 | # WARNING: This traces VFS reads and writes, which can be extremely frequent, |
| 21 | # and so the overhead of this tool can become severe depending on the |
| 22 | # workload. |
| 23 | # |
| 24 | # By default, a minimum millisecond threshold of 10 is used. |
| 25 | # |
| 26 | # Copyright 2016 Netflix, Inc. |
| 27 | # Licensed under the Apache License, Version 2.0 (the "License") |
| 28 | # |
| 29 | # 06-Feb-2016 Brendan Gregg Created this. |
| 30 | |
| 31 | from __future__ import print_function |
| 32 | from bcc import BPF |
| 33 | import argparse |
Mark Drayton | 266d6f6 | 2016-05-24 07:01:01 -0700 | [diff] [blame] | 34 | import time |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 35 | |
| 36 | # arguments |
| 37 | examples = """examples: |
Brendan Gregg | 75d3e9d | 2016-02-07 18:48:20 -0800 | [diff] [blame] | 38 | ./fileslower # trace sync file I/O slower than 10 ms (default) |
| 39 | ./fileslower 1 # trace sync file I/O slower than 1 ms |
| 40 | ./fileslower -p 185 # trace PID 185 only |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 41 | """ |
| 42 | parser = argparse.ArgumentParser( |
Brendan Gregg | 75d3e9d | 2016-02-07 18:48:20 -0800 | [diff] [blame] | 43 | description="Trace slow synchronous file reads and writes", |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 44 | formatter_class=argparse.RawDescriptionHelpFormatter, |
| 45 | epilog=examples) |
Mark Drayton | 7434731 | 2016-08-25 20:46:35 +0100 | [diff] [blame] | 46 | parser.add_argument("-p", "--pid", type=int, metavar="PID", dest="tgid", |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 47 | help="trace this PID only") |
Mark Drayton | 7434731 | 2016-08-25 20:46:35 +0100 | [diff] [blame] | 48 | parser.add_argument("-a", "--all-files", action="store_true", |
| 49 | help="include non-regular file types (sockets, FIFOs, etc)") |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 50 | parser.add_argument("min_ms", nargs="?", default='10', |
| 51 | help="minimum I/O duration to trace, in ms (default 10)") |
Nathan Scott | cf0792f | 2018-02-02 16:56:50 +1100 | [diff] [blame] | 52 | parser.add_argument("--ebpf", action="store_true", |
| 53 | help=argparse.SUPPRESS) |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 54 | args = parser.parse_args() |
| 55 | min_ms = int(args.min_ms) |
Mark Drayton | 7434731 | 2016-08-25 20:46:35 +0100 | [diff] [blame] | 56 | tgid = args.tgid |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 57 | debug = 0 |
| 58 | |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 59 | # define BPF program |
| 60 | bpf_text = """ |
| 61 | #include <uapi/linux/ptrace.h> |
| 62 | #include <linux/fs.h> |
Mark Drayton | 266d6f6 | 2016-05-24 07:01:01 -0700 | [diff] [blame] | 63 | #include <linux/sched.h> |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 64 | |
Mark Drayton | 266d6f6 | 2016-05-24 07:01:01 -0700 | [diff] [blame] | 65 | enum trace_mode { |
| 66 | MODE_READ, |
| 67 | MODE_WRITE |
| 68 | }; |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 69 | |
| 70 | struct val_t { |
| 71 | u32 sz; |
| 72 | u64 ts; |
Mark Drayton | 7434731 | 2016-08-25 20:46:35 +0100 | [diff] [blame] | 73 | u32 name_len; |
| 74 | // de->d_name.name may point to de->d_iname so limit len accordingly |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 75 | char name[DNAME_INLINE_LEN]; |
Mark Drayton | 266d6f6 | 2016-05-24 07:01:01 -0700 | [diff] [blame] | 76 | char comm[TASK_COMM_LEN]; |
| 77 | }; |
| 78 | |
| 79 | struct data_t { |
| 80 | enum trace_mode mode; |
| 81 | u32 pid; |
| 82 | u32 sz; |
| 83 | u64 delta_us; |
Mark Drayton | 7434731 | 2016-08-25 20:46:35 +0100 | [diff] [blame] | 84 | u32 name_len; |
Mark Drayton | 266d6f6 | 2016-05-24 07:01:01 -0700 | [diff] [blame] | 85 | char name[DNAME_INLINE_LEN]; |
| 86 | char comm[TASK_COMM_LEN]; |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 87 | }; |
| 88 | |
| 89 | BPF_HASH(entryinfo, pid_t, struct val_t); |
Mark Drayton | 266d6f6 | 2016-05-24 07:01:01 -0700 | [diff] [blame] | 90 | BPF_PERF_OUTPUT(events); |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 91 | |
| 92 | // store timestamp and size on entry |
| 93 | static int trace_rw_entry(struct pt_regs *ctx, struct file *file, |
| 94 | char __user *buf, size_t count) |
| 95 | { |
Mark Drayton | 7434731 | 2016-08-25 20:46:35 +0100 | [diff] [blame] | 96 | u32 tgid = bpf_get_current_pid_tgid() >> 32; |
| 97 | if (TGID_FILTER) |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 98 | return 0; |
| 99 | |
Mark Drayton | 7434731 | 2016-08-25 20:46:35 +0100 | [diff] [blame] | 100 | u32 pid = bpf_get_current_pid_tgid(); |
| 101 | |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 102 | // skip I/O lacking a filename |
| 103 | struct dentry *de = file->f_path.dentry; |
Mark Drayton | 7434731 | 2016-08-25 20:46:35 +0100 | [diff] [blame] | 104 | int mode = file->f_inode->i_mode; |
| 105 | if (de->d_name.len == 0 || TYPE_FILTER) |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 106 | return 0; |
| 107 | |
| 108 | // store size and timestamp by pid |
| 109 | struct val_t val = {}; |
| 110 | val.sz = count; |
| 111 | val.ts = bpf_ktime_get_ns(); |
Mark Drayton | 7434731 | 2016-08-25 20:46:35 +0100 | [diff] [blame] | 112 | |
Paul Chaignon | f86f7e8 | 2018-06-14 02:20:03 +0200 | [diff] [blame] | 113 | struct qstr d_name = de->d_name; |
| 114 | val.name_len = d_name.len; |
Sumanth Korikkar | 7f6066d | 2020-05-20 10:49:56 -0500 | [diff] [blame] | 115 | bpf_probe_read_kernel(&val.name, sizeof(val.name), d_name.name); |
Mark Drayton | 266d6f6 | 2016-05-24 07:01:01 -0700 | [diff] [blame] | 116 | bpf_get_current_comm(&val.comm, sizeof(val.comm)); |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 117 | entryinfo.update(&pid, &val); |
| 118 | |
| 119 | return 0; |
| 120 | } |
| 121 | |
| 122 | int trace_read_entry(struct pt_regs *ctx, struct file *file, |
| 123 | char __user *buf, size_t count) |
| 124 | { |
| 125 | // skip non-sync I/O; see kernel code for __vfs_read() |
| 126 | if (!(file->f_op->read_iter)) |
| 127 | return 0; |
| 128 | return trace_rw_entry(ctx, file, buf, count); |
| 129 | } |
| 130 | |
| 131 | int trace_write_entry(struct pt_regs *ctx, struct file *file, |
| 132 | char __user *buf, size_t count) |
| 133 | { |
| 134 | // skip non-sync I/O; see kernel code for __vfs_write() |
| 135 | if (!(file->f_op->write_iter)) |
| 136 | return 0; |
| 137 | return trace_rw_entry(ctx, file, buf, count); |
| 138 | } |
| 139 | |
| 140 | // output |
| 141 | static int trace_rw_return(struct pt_regs *ctx, int type) |
| 142 | { |
| 143 | struct val_t *valp; |
| 144 | u32 pid = bpf_get_current_pid_tgid(); |
| 145 | |
| 146 | valp = entryinfo.lookup(&pid); |
| 147 | if (valp == 0) { |
| 148 | // missed tracing issue or filtered |
| 149 | return 0; |
| 150 | } |
| 151 | u64 delta_us = (bpf_ktime_get_ns() - valp->ts) / 1000; |
| 152 | entryinfo.delete(&pid); |
| 153 | if (delta_us < MIN_US) |
| 154 | return 0; |
| 155 | |
Mark Drayton | 266d6f6 | 2016-05-24 07:01:01 -0700 | [diff] [blame] | 156 | struct data_t data = {}; |
| 157 | data.mode = type; |
| 158 | data.pid = pid; |
| 159 | data.sz = valp->sz; |
| 160 | data.delta_us = delta_us; |
Mark Drayton | 7434731 | 2016-08-25 20:46:35 +0100 | [diff] [blame] | 161 | data.name_len = valp->name_len; |
Sumanth Korikkar | 7f6066d | 2020-05-20 10:49:56 -0500 | [diff] [blame] | 162 | bpf_probe_read_kernel(&data.name, sizeof(data.name), valp->name); |
| 163 | bpf_probe_read_kernel(&data.comm, sizeof(data.comm), valp->comm); |
Mark Drayton | 266d6f6 | 2016-05-24 07:01:01 -0700 | [diff] [blame] | 164 | events.perf_submit(ctx, &data, sizeof(data)); |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 165 | |
| 166 | return 0; |
| 167 | } |
| 168 | |
| 169 | int trace_read_return(struct pt_regs *ctx) |
| 170 | { |
Mark Drayton | 266d6f6 | 2016-05-24 07:01:01 -0700 | [diff] [blame] | 171 | return trace_rw_return(ctx, MODE_READ); |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 172 | } |
| 173 | |
| 174 | int trace_write_return(struct pt_regs *ctx) |
| 175 | { |
Mark Drayton | 266d6f6 | 2016-05-24 07:01:01 -0700 | [diff] [blame] | 176 | return trace_rw_return(ctx, MODE_WRITE); |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 177 | } |
| 178 | |
| 179 | """ |
| 180 | bpf_text = bpf_text.replace('MIN_US', str(min_ms * 1000)) |
Mark Drayton | 7434731 | 2016-08-25 20:46:35 +0100 | [diff] [blame] | 181 | if args.tgid: |
| 182 | bpf_text = bpf_text.replace('TGID_FILTER', 'tgid != %d' % tgid) |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 183 | else: |
Mark Drayton | 7434731 | 2016-08-25 20:46:35 +0100 | [diff] [blame] | 184 | bpf_text = bpf_text.replace('TGID_FILTER', '0') |
| 185 | if args.all_files: |
| 186 | bpf_text = bpf_text.replace('TYPE_FILTER', '0') |
| 187 | else: |
| 188 | bpf_text = bpf_text.replace('TYPE_FILTER', '!S_ISREG(mode)') |
| 189 | |
Nathan Scott | cf0792f | 2018-02-02 16:56:50 +1100 | [diff] [blame] | 190 | if debug or args.ebpf: |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 191 | print(bpf_text) |
Nathan Scott | cf0792f | 2018-02-02 16:56:50 +1100 | [diff] [blame] | 192 | if args.ebpf: |
| 193 | exit() |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 194 | |
| 195 | # initialize BPF |
Nathan Scott | cf0792f | 2018-02-02 16:56:50 +1100 | [diff] [blame] | 196 | b = BPF(text=bpf_text) |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 197 | |
| 198 | # I'd rather trace these via new_sync_read/new_sync_write (which used to be |
| 199 | # do_sync_read/do_sync_write), but those became static. So trace these from |
| 200 | # the parent functions, at the cost of more overhead, instead. |
| 201 | # Ultimately, we should be using [V]FS tracepoints. |
He Zhe | b61e65f | 2020-08-12 13:04:35 +0800 | [diff] [blame] | 202 | try: |
| 203 | b.attach_kprobe(event="__vfs_read", fn_name="trace_read_entry") |
| 204 | b.attach_kretprobe(event="__vfs_read", fn_name="trace_read_return") |
| 205 | except Exception: |
| 206 | print('Current kernel does not have __vfs_read, try vfs_read instead') |
| 207 | b.attach_kprobe(event="vfs_read", fn_name="trace_read_entry") |
| 208 | b.attach_kretprobe(event="vfs_read", fn_name="trace_read_return") |
Mark Drayton | 32a4fd3 | 2016-07-13 18:24:56 +0100 | [diff] [blame] | 209 | try: |
| 210 | b.attach_kprobe(event="__vfs_write", fn_name="trace_write_entry") |
| 211 | b.attach_kretprobe(event="__vfs_write", fn_name="trace_write_return") |
Teng Qin | aaca976 | 2019-01-11 11:18:45 -0800 | [diff] [blame] | 212 | except Exception: |
He Zhe | b61e65f | 2020-08-12 13:04:35 +0800 | [diff] [blame] | 213 | print('Current kernel does not have __vfs_write, try vfs_write instead') |
Mark Drayton | 32a4fd3 | 2016-07-13 18:24:56 +0100 | [diff] [blame] | 214 | b.attach_kprobe(event="vfs_write", fn_name="trace_write_entry") |
| 215 | b.attach_kretprobe(event="vfs_write", fn_name="trace_write_return") |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 216 | |
Mark Drayton | 266d6f6 | 2016-05-24 07:01:01 -0700 | [diff] [blame] | 217 | mode_s = { |
| 218 | 0: 'R', |
| 219 | 1: 'W', |
| 220 | } |
| 221 | |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 222 | # header |
| 223 | print("Tracing sync read/writes slower than %d ms" % min_ms) |
Mark Drayton | 7434731 | 2016-08-25 20:46:35 +0100 | [diff] [blame] | 224 | print("%-8s %-14s %-6s %1s %-7s %7s %s" % ("TIME(s)", "COMM", "TID", "D", |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 225 | "BYTES", "LAT(ms)", "FILENAME")) |
| 226 | |
Mark Drayton | 266d6f6 | 2016-05-24 07:01:01 -0700 | [diff] [blame] | 227 | start_ts = time.time() |
Allan McAleavy | 4f690c2 | 2019-02-27 18:03:42 +0000 | [diff] [blame] | 228 | DNAME_INLINE_LEN = 32 |
Mark Drayton | 266d6f6 | 2016-05-24 07:01:01 -0700 | [diff] [blame] | 229 | def print_event(cpu, data, size): |
Xiaozhou Liu | 51d62d3 | 2019-02-15 13:03:05 +0800 | [diff] [blame] | 230 | event = b["events"].event(data) |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 231 | |
Mark Drayton | 266d6f6 | 2016-05-24 07:01:01 -0700 | [diff] [blame] | 232 | ms = float(event.delta_us) / 1000 |
jeromemarchand | b96ebcd | 2018-10-10 01:58:15 +0200 | [diff] [blame] | 233 | name = event.name.decode('utf-8', 'replace') |
Mark Drayton | 7434731 | 2016-08-25 20:46:35 +0100 | [diff] [blame] | 234 | if event.name_len > DNAME_INLINE_LEN: |
| 235 | name = name[:-3] + "..." |
Brendan Gregg | a32fbaf | 2016-02-07 12:14:37 -0800 | [diff] [blame] | 236 | |
| 237 | print("%-8.3f %-14.14s %-6s %1s %-7s %7.2f %s" % ( |
jeromemarchand | b96ebcd | 2018-10-10 01:58:15 +0200 | [diff] [blame] | 238 | time.time() - start_ts, event.comm.decode('utf-8', 'replace'), |
| 239 | event.pid, mode_s[event.mode], event.sz, ms, name)) |
Mark Drayton | 266d6f6 | 2016-05-24 07:01:01 -0700 | [diff] [blame] | 240 | |
Mark Drayton | 5f5687e | 2017-02-20 18:13:03 +0000 | [diff] [blame] | 241 | b["events"].open_perf_buffer(print_event, page_cnt=64) |
Mark Drayton | 266d6f6 | 2016-05-24 07:01:01 -0700 | [diff] [blame] | 242 | while 1: |
Jerome Marchand | 5167127 | 2018-12-19 01:57:24 +0100 | [diff] [blame] | 243 | try: |
| 244 | b.perf_buffer_poll() |
| 245 | except KeyboardInterrupt: |
| 246 | exit() |