blob: ec6b077ba8c81dfa1107a0988dd4170914d51dd9 [file] [log] [blame]
Erwan Velu8c127942020-05-20 20:25:32 +02001#!/usr/bin/python
2# @lint-avoid-python-3-compatibility-imports
3#
4# dirtop file reads and writes by directory.
5# For Linux, uses BCC, eBPF.
6#
7# USAGE: dirtop.py -d 'directory1,directory2' [-h] [-C] [-r MAXROWS] [interval] [count]
8#
9# This uses in-kernel eBPF maps to store per process summaries for efficiency.
10#
11# Copyright 2016 Netflix, Inc.
12# Licensed under the Apache License, Version 2.0 (the "License")
13#
14# 13-Mar-2020 Erwan Velu Created dirtop from filetop
15# 06-Feb-2016 Brendan Gregg Created filetop.
16
17from __future__ import print_function
18from bcc import BPF
19from time import sleep, strftime
20import argparse
21import os
22import stat
23from subprocess import call
24
25# arguments
26examples = """examples:
27 ./dirtop -d '/hdfs/uuid/*/yarn' # directory I/O top, 1 second refresh
28 ./dirtop -d '/hdfs/uuid/*/yarn' -C # don't clear the screen
29 ./dirtop -d '/hdfs/uuid/*/yarn' 5 # 5 second summaries
30 ./dirtop -d '/hdfs/uuid/*/yarn' 5 10 # 5 second summaries, 10 times only
31 ./dirtop -d '/hdfs/uuid/*/yarn,/hdfs/uuid/*/data' # Running dirtop on two set of directories
32"""
33parser = argparse.ArgumentParser(
34 description="File reads and writes by process",
35 formatter_class=argparse.RawDescriptionHelpFormatter,
36 epilog=examples)
37parser.add_argument("-C", "--noclear", action="store_true",
38 help="don't clear the screen")
39parser.add_argument("-r", "--maxrows", default=20,
40 help="maximum rows to print, default 20")
41parser.add_argument("-s", "--sort", default="all",
42 choices=["all", "reads", "writes", "rbytes", "wbytes"],
43 help="sort column, default all")
44parser.add_argument("-p", "--pid", type=int, metavar="PID", dest="tgid",
45 help="trace this PID only")
46parser.add_argument("interval", nargs="?", default=1,
47 help="output interval, in seconds")
48parser.add_argument("count", nargs="?", default=99999999,
49 help="number of outputs")
50parser.add_argument("--ebpf", action="store_true",
51 help=argparse.SUPPRESS)
52parser.add_argument("-d", "--root-directories", type=str, required=True, dest="rootdirs",
53 help="select the directories to observe, separated by commas")
54args = parser.parse_args()
55interval = int(args.interval)
56countdown = int(args.count)
57maxrows = int(args.maxrows)
58clear = not int(args.noclear)
59debug = 0
60
61# linux stats
62loadavg = "/proc/loadavg"
63
64# define BPF program
65bpf_text = """
66# include <uapi/linux/ptrace.h>
67# include <linux/blkdev.h>
68
69// the key for the output summary
70struct info_t {
71 unsigned long inode_id;
72};
73
74// the value of the output summary
75struct val_t {
76 u64 reads;
77 u64 writes;
78 u64 rbytes;
79 u64 wbytes;
80};
81
82BPF_HASH(counts, struct info_t, struct val_t);
83
84static int do_entry(struct pt_regs *ctx, struct file *file,
85 char __user *buf, size_t count, int is_read)
86{
87 u32 tgid = bpf_get_current_pid_tgid() >> 32;
88 if (TGID_FILTER)
89 return 0;
90
91 // The directory inodes we look at
92 u32 dir_ids[INODES_NUMBER] = DIRECTORY_INODES;
93 struct info_t info = {.inode_id = 0};
94 struct dentry *pde = file->f_path.dentry;
95 for (int i=0; i<50; i++) {
96 // If we don't have any parent, we reached the root
97 if (!pde->d_parent) {
98 break;
99 }
100 pde = pde->d_parent;
101 // Does the files is part of the directory we look for
102 for(int dir_id=0; dir_id<INODES_NUMBER; dir_id++) {
103 if (pde->d_inode->i_ino == dir_ids[dir_id]) {
104 // Yes, let's export the top directory inode
105 info.inode_id = pde->d_inode->i_ino;
106 break;
107 }
108 }
109 }
110 // If we didn't found any, let's abort
111 if (info.inode_id == 0) {
112 return 0;
113 }
114
115 struct val_t *valp, zero = {};
116 valp = counts.lookup_or_try_init(&info, &zero);
117 if (valp) {
118 if (is_read) {
119 valp->reads++;
120 valp->rbytes += count;
121 } else {
122 valp->writes++;
123 valp->wbytes += count;
124 }
125 }
126 return 0;
127}
128
129int trace_read_entry(struct pt_regs *ctx, struct file *file,
130 char __user *buf, size_t count)
131{
132 return do_entry(ctx, file, buf, count, 1);
133}
134
135int trace_write_entry(struct pt_regs *ctx, struct file *file,
136 char __user *buf, size_t count)
137{
138 return do_entry(ctx, file, buf, count, 0);
139}
140
141"""
142
143
144def get_searched_ids(root_directories):
145 """Export the inode numbers of the selected directories."""
146 from glob import glob
147 inode_to_path = {}
148 inodes = "{"
149 total_dirs = 0
150 for root_directory in root_directories.split(','):
William Findlay1efba052020-10-16 13:06:36 -0400151 try:
152 searched_dirs = glob(root_directory, recursive=True)
153 except TypeError:
154 searched_dirs = glob(root_directory)
Erwan Velu8c127942020-05-20 20:25:32 +0200155 if not searched_dirs:
156 continue
157
158 for mydir in searched_dirs:
159 total_dirs = total_dirs + 1
160 # If we pass more than 15 dirs, ebpf program fails
161 if total_dirs > 15:
162 print('15 directories limit reached')
163 break
164 inode_id = os.lstat(mydir)[stat.ST_INO]
165 if inode_id in inode_to_path:
166 if inode_to_path[inode_id] == mydir:
167 print('Skipping {} as already considered'.format(mydir))
168 else:
169 inodes = "{},{}".format(inodes, inode_id)
170 inode_to_path[inode_id] = mydir
171 print('Considering {} with inode_id {}'.format(mydir, inode_id))
172
173 inodes = inodes + '}'
174 if len(inode_to_path) == 0:
175 print('Cannot find any valid directory')
176 exit()
177 return inodes.replace('{,', '{'), inode_to_path
178
179
180if args.tgid:
181 bpf_text = bpf_text.replace('TGID_FILTER', 'tgid != %d' % args.tgid)
182else:
183 bpf_text = bpf_text.replace('TGID_FILTER', '0')
184
185inodes, inodes_to_path = get_searched_ids(args.rootdirs)
186bpf_text = bpf_text.replace("DIRECTORY_INODES", inodes)
187bpf_text = bpf_text.replace(
188 "INODES_NUMBER", '{}'.format(len(inodes.split(','))))
189
190if debug or args.ebpf:
191 print(bpf_text)
192 if args.ebpf:
193 exit()
194
195# initialize BPF
196b = BPF(text=bpf_text)
197b.attach_kprobe(event="vfs_read", fn_name="trace_read_entry")
198b.attach_kprobe(event="vfs_write", fn_name="trace_write_entry")
199
200DNAME_INLINE_LEN = 32 # linux/dcache.h
201
202print('Tracing... Output every %d secs. Hit Ctrl-C to end' % interval)
203
204
205def sort_fn(counts):
206 """Define how to sort the columns"""
207 if args.sort == "all":
208 return (counts[1].rbytes + counts[1].wbytes + counts[1].reads + counts[1].writes)
209 else:
210 return getattr(counts[1], args.sort)
211
212
213# output
214exiting = 0
215while 1:
216 try:
217 sleep(interval)
218 except KeyboardInterrupt:
219 exiting = 1
220
221 # header
222 if clear:
223 call("clear")
224 else:
225 print()
226 with open(loadavg) as stats:
227 print("%-8s loadavg: %s" % (strftime("%H:%M:%S"), stats.read()))
228
229 print("%-6s %-6s %-8s %-8s %s" %
230 ("READS", "WRITES", "R_Kb", "W_Kb", "PATH"))
231 # by-TID output
232 counts = b.get_table("counts")
233 line = 0
234 reads = {}
235 writes = {}
236 reads_Kb = {}
237 writes_Kb = {}
238 for k, v in reversed(sorted(counts.items(),
239 key=sort_fn)):
240 # If it's the first time we see this inode
241 if k.inode_id not in reads:
242 # let's create a new entry
243 reads[k.inode_id] = v.reads
244 writes[k.inode_id] = v.writes
245 reads_Kb[k.inode_id] = v.rbytes / 1024
246 writes_Kb[k.inode_id] = v.wbytes / 1024
247 else:
248 # unless add the current performance metrics
249 # to the previous ones
250 reads[k.inode_id] += v.reads
251 writes[k.inode_id] += v.writes
252 reads_Kb[k.inode_id] += v.rbytes / 1024
253 writes_Kb[k.inode_id] += v.wbytes / 1024
254
255 for node_id in reads:
256 print("%-6d %-6d %-8d %-8d %s" %
257 (reads[node_id], writes[node_id], reads_Kb[node_id], writes_Kb[node_id], inodes_to_path[node_id]))
258 line += 1
259 if line >= maxrows:
260 break
261
262 counts.clear()
263
264 countdown -= 1
265 if exiting or countdown == 0:
266 print("Detaching...")
267 exit()