Blame - tools/dirtop.py - platform/external/bcc

blob: ec6b077ba8c81dfa1107a0988dd4170914d51dd9 [file] [log] [blame]

Erwan Velu	8c12794	2020-05-20 20:25:32 +0200	[diff] [blame]	1	#!/usr/bin/python
				2	# @lint-avoid-python-3-compatibility-imports
				3	#
				4	# dirtop file reads and writes by directory.
				5	# For Linux, uses BCC, eBPF.
				6	#
				7	# USAGE: dirtop.py -d 'directory1,directory2' [-h] [-C] [-r MAXROWS] [interval] [count]
				8	#
				9	# This uses in-kernel eBPF maps to store per process summaries for efficiency.
				10	#
				11	# Copyright 2016 Netflix, Inc.
				12	# Licensed under the Apache License, Version 2.0 (the "License")
				13	#
				14	# 13-Mar-2020 Erwan Velu Created dirtop from filetop
				15	# 06-Feb-2016 Brendan Gregg Created filetop.
				16
				17	from __future__ import print_function
				18	from bcc import BPF
				19	from time import sleep, strftime
				20	import argparse
				21	import os
				22	import stat
				23	from subprocess import call
				24
				25	# arguments
				26	examples = """examples:
				27	./dirtop -d '/hdfs/uuid/*/yarn' # directory I/O top, 1 second refresh
				28	./dirtop -d '/hdfs/uuid/*/yarn' -C # don't clear the screen
				29	./dirtop -d '/hdfs/uuid/*/yarn' 5 # 5 second summaries
				30	./dirtop -d '/hdfs/uuid/*/yarn' 5 10 # 5 second summaries, 10 times only
				31	./dirtop -d '/hdfs/uuid//yarn,/hdfs/uuid//data' # Running dirtop on two set of directories
				32	"""
				33	parser = argparse.ArgumentParser(
				34	description="File reads and writes by process",
				35	formatter_class=argparse.RawDescriptionHelpFormatter,
				36	epilog=examples)
				37	parser.add_argument("-C", "--noclear", action="store_true",
				38	help="don't clear the screen")
				39	parser.add_argument("-r", "--maxrows", default=20,
				40	help="maximum rows to print, default 20")
				41	parser.add_argument("-s", "--sort", default="all",
				42	choices=["all", "reads", "writes", "rbytes", "wbytes"],
				43	help="sort column, default all")
				44	parser.add_argument("-p", "--pid", type=int, metavar="PID", dest="tgid",
				45	help="trace this PID only")
				46	parser.add_argument("interval", nargs="?", default=1,
				47	help="output interval, in seconds")
				48	parser.add_argument("count", nargs="?", default=99999999,
				49	help="number of outputs")
				50	parser.add_argument("--ebpf", action="store_true",
				51	help=argparse.SUPPRESS)
				52	parser.add_argument("-d", "--root-directories", type=str, required=True, dest="rootdirs",
				53	help="select the directories to observe, separated by commas")
				54	args = parser.parse_args()
				55	interval = int(args.interval)
				56	countdown = int(args.count)
				57	maxrows = int(args.maxrows)
				58	clear = not int(args.noclear)
				59	debug = 0
				60
				61	# linux stats
				62	loadavg = "/proc/loadavg"
				63
				64	# define BPF program
				65	bpf_text = """
				66	# include <uapi/linux/ptrace.h>
				67	# include <linux/blkdev.h>
				68
				69	// the key for the output summary
				70	struct info_t {
				71	unsigned long inode_id;
				72	};
				73
				74	// the value of the output summary
				75	struct val_t {
				76	u64 reads;
				77	u64 writes;
				78	u64 rbytes;
				79	u64 wbytes;
				80	};
				81
				82	BPF_HASH(counts, struct info_t, struct val_t);
				83
				84	static int do_entry(struct pt_regs ctx, struct file file,
				85	char __user *buf, size_t count, int is_read)
				86	{
				87	u32 tgid = bpf_get_current_pid_tgid() >> 32;
				88	if (TGID_FILTER)
				89	return 0;
				90
				91	// The directory inodes we look at
				92	u32 dir_ids[INODES_NUMBER] = DIRECTORY_INODES;
				93	struct info_t info = {.inode_id = 0};
				94	struct dentry *pde = file->f_path.dentry;
				95	for (int i=0; i<50; i++) {
				96	// If we don't have any parent, we reached the root
				97	if (!pde->d_parent) {
				98	break;
				99	}
				100	pde = pde->d_parent;
				101	// Does the files is part of the directory we look for
				102	for(int dir_id=0; dir_id<INODES_NUMBER; dir_id++) {
				103	if (pde->d_inode->i_ino == dir_ids[dir_id]) {
				104	// Yes, let's export the top directory inode
				105	info.inode_id = pde->d_inode->i_ino;
				106	break;
				107	}
				108	}
				109	}
				110	// If we didn't found any, let's abort
				111	if (info.inode_id == 0) {
				112	return 0;
				113	}
				114
				115	struct val_t *valp, zero = {};
				116	valp = counts.lookup_or_try_init(&info, &zero);
				117	if (valp) {
				118	if (is_read) {
				119	valp->reads++;
				120	valp->rbytes += count;
				121	} else {
				122	valp->writes++;
				123	valp->wbytes += count;
				124	}
				125	}
				126	return 0;
				127	}
				128
				129	int trace_read_entry(struct pt_regs ctx, struct file file,
				130	char __user *buf, size_t count)
				131	{
				132	return do_entry(ctx, file, buf, count, 1);
				133	}
				134
				135	int trace_write_entry(struct pt_regs ctx, struct file file,
				136	char __user *buf, size_t count)
				137	{
				138	return do_entry(ctx, file, buf, count, 0);
				139	}
				140
				141	"""
				142
				143
				144	def get_searched_ids(root_directories):
				145	"""Export the inode numbers of the selected directories."""
				146	from glob import glob
				147	inode_to_path = {}
				148	inodes = "{"
				149	total_dirs = 0
				150	for root_directory in root_directories.split(','):
William Findlay	1efba05	2020-10-16 13:06:36 -0400	[diff] [blame]	151	try:
				152	searched_dirs = glob(root_directory, recursive=True)
				153	except TypeError:
				154	searched_dirs = glob(root_directory)
Erwan Velu	8c12794	2020-05-20 20:25:32 +0200	[diff] [blame]	155	if not searched_dirs:
				156	continue
				157
				158	for mydir in searched_dirs:
				159	total_dirs = total_dirs + 1
				160	# If we pass more than 15 dirs, ebpf program fails
				161	if total_dirs > 15:
				162	print('15 directories limit reached')
				163	break
				164	inode_id = os.lstat(mydir)[stat.ST_INO]
				165	if inode_id in inode_to_path:
				166	if inode_to_path[inode_id] == mydir:
				167	print('Skipping {} as already considered'.format(mydir))
				168	else:
				169	inodes = "{},{}".format(inodes, inode_id)
				170	inode_to_path[inode_id] = mydir
				171	print('Considering {} with inode_id {}'.format(mydir, inode_id))
				172
				173	inodes = inodes + '}'
				174	if len(inode_to_path) == 0:
				175	print('Cannot find any valid directory')
				176	exit()
				177	return inodes.replace('{,', '{'), inode_to_path
				178
				179
				180	if args.tgid:
				181	bpf_text = bpf_text.replace('TGID_FILTER', 'tgid != %d' % args.tgid)
				182	else:
				183	bpf_text = bpf_text.replace('TGID_FILTER', '0')
				184
				185	inodes, inodes_to_path = get_searched_ids(args.rootdirs)
				186	bpf_text = bpf_text.replace("DIRECTORY_INODES", inodes)
				187	bpf_text = bpf_text.replace(
				188	"INODES_NUMBER", '{}'.format(len(inodes.split(','))))
				189
				190	if debug or args.ebpf:
				191	print(bpf_text)
				192	if args.ebpf:
				193	exit()
				194
				195	# initialize BPF
				196	b = BPF(text=bpf_text)
				197	b.attach_kprobe(event="vfs_read", fn_name="trace_read_entry")
				198	b.attach_kprobe(event="vfs_write", fn_name="trace_write_entry")
				199
				200	DNAME_INLINE_LEN = 32 # linux/dcache.h
				201
				202	print('Tracing... Output every %d secs. Hit Ctrl-C to end' % interval)
				203
				204
				205	def sort_fn(counts):
				206	"""Define how to sort the columns"""
				207	if args.sort == "all":
				208	return (counts[1].rbytes + counts[1].wbytes + counts[1].reads + counts[1].writes)
				209	else:
				210	return getattr(counts[1], args.sort)
				211
				212
				213	# output
				214	exiting = 0
				215	while 1:
				216	try:
				217	sleep(interval)
				218	except KeyboardInterrupt:
				219	exiting = 1
				220
				221	# header
				222	if clear:
				223	call("clear")
				224	else:
				225	print()
				226	with open(loadavg) as stats:
				227	print("%-8s loadavg: %s" % (strftime("%H:%M:%S"), stats.read()))
				228
				229	print("%-6s %-6s %-8s %-8s %s" %
				230	("READS", "WRITES", "R_Kb", "W_Kb", "PATH"))
				231	# by-TID output
				232	counts = b.get_table("counts")
				233	line = 0
				234	reads = {}
				235	writes = {}
				236	reads_Kb = {}
				237	writes_Kb = {}
				238	for k, v in reversed(sorted(counts.items(),
				239	key=sort_fn)):
				240	# If it's the first time we see this inode
				241	if k.inode_id not in reads:
				242	# let's create a new entry
				243	reads[k.inode_id] = v.reads
				244	writes[k.inode_id] = v.writes
				245	reads_Kb[k.inode_id] = v.rbytes / 1024
				246	writes_Kb[k.inode_id] = v.wbytes / 1024
				247	else:
				248	# unless add the current performance metrics
				249	# to the previous ones
				250	reads[k.inode_id] += v.reads
				251	writes[k.inode_id] += v.writes
				252	reads_Kb[k.inode_id] += v.rbytes / 1024
				253	writes_Kb[k.inode_id] += v.wbytes / 1024
				254
				255	for node_id in reads:
				256	print("%-6d %-6d %-8d %-8d %s" %
				257	(reads[node_id], writes[node_id], reads_Kb[node_id], writes_Kb[node_id], inodes_to_path[node_id]))
				258	line += 1
				259	if line >= maxrows:
				260	break
				261
				262	counts.clear()
				263
				264	countdown -= 1
				265	if exiting or countdown == 0:
				266	print("Detaching...")
				267	exit()