Blame - block/blktrace.c - kernel/msm-4.19

blob: 265f7a830619ef1e5de60bfe1179fa9d3a002229 [file] [log] [blame]

Jens Axboe	2056a78	2006-03-23 20:00:26 +0100	[diff] [blame]	1	/*
				2	* Copyright (C) 2006 Jens Axboe <axboe@suse.de>
				3	*
				4	* This program is free software; you can redistribute it and/or modify
				5	* it under the terms of the GNU General Public License version 2 as
				6	* published by the Free Software Foundation.
				7	*
				8	* This program is distributed in the hope that it will be useful,
				9	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				10	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				11	* GNU General Public License for more details.
				12	*
				13	* You should have received a copy of the GNU General Public License
				14	* along with this program; if not, write to the Free Software
				15	* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
				16	*
				17	*/
Jens Axboe	2056a78	2006-03-23 20:00:26 +0100	[diff] [blame]	18	#include <linux/kernel.h>
				19	#include <linux/blkdev.h>
				20	#include <linux/blktrace_api.h>
				21	#include <linux/percpu.h>
				22	#include <linux/init.h>
				23	#include <linux/mutex.h>
				24	#include <linux/debugfs.h>
				25	#include <asm/uaccess.h>
				26
				27	static DEFINE_PER_CPU(unsigned long long, blk_trace_cpu_offset) = { 0, };
				28	static unsigned int blktrace_seq __read_mostly = 1;
				29
				30	/*
				31	* Send out a notify for this process, if we haven't done so since a trace
				32	* started
				33	*/
				34	static void trace_note_tsk(struct blk_trace bt, struct task_struct tsk)
				35	{
				36	struct blk_io_trace *t;
				37
				38	t = relay_reserve(bt->rchan, sizeof(*t) + sizeof(tsk->comm));
				39	if (t) {
				40	t->magic = BLK_IO_TRACE_MAGIC \| BLK_IO_TRACE_VERSION;
				41	t->device = bt->dev;
				42	t->action = BLK_TC_ACT(BLK_TC_NOTIFY);
				43	t->pid = tsk->pid;
				44	t->cpu = smp_processor_id();
				45	t->pdu_len = sizeof(tsk->comm);
				46	memcpy((void ) t + sizeof(t), tsk->comm, t->pdu_len);
				47	tsk->btrace_seq = blktrace_seq;
				48	}
				49	}
				50
				51	static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
				52	pid_t pid)
				53	{
				54	if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0)
				55	return 1;
				56	if (sector < bt->start_lba \|\| sector > bt->end_lba)
				57	return 1;
				58	if (bt->pid && pid != bt->pid)
				59	return 1;
				60
				61	return 0;
				62	}
				63
				64	/*
				65	* Data direction bit lookup
				66	*/
				67	static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) };
				68
				69	/*
				70	* Bio action bits of interest
				71	*/
Nathan Scott	40359cc	2006-07-06 10:03:28 +0200	[diff] [blame]	72	static u32 bio_act[5] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD) };
Jens Axboe	2056a78	2006-03-23 20:00:26 +0100	[diff] [blame]	73
				74	/*
				75	* More could be added as needed, taking care to increment the decrementer
				76	* to get correct indexing
				77	*/
				78	#define trace_barrier_bit(rw) \
				79	(((rw) & (1 << BIO_RW_BARRIER)) >> (BIO_RW_BARRIER - 0))
				80	#define trace_sync_bit(rw) \
				81	(((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1))
Nathan Scott	40359cc	2006-07-06 10:03:28 +0200	[diff] [blame]	82	#define trace_ahead_bit(rw) \
Milton Miller	ad01b1c	2006-07-25 15:04:13 +0200	[diff] [blame]	83	(((rw) & (1 << BIO_RW_AHEAD)) << (2 - BIO_RW_AHEAD))
Jens Axboe	2056a78	2006-03-23 20:00:26 +0100	[diff] [blame]	84
				85	/*
				86	* The worker for the various blk_add_trace*() types. Fills out a
				87	* blk_io_trace structure and places it in a per-cpu subbuffer.
				88	*/
				89	void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
				90	int rw, u32 what, int error, int pdu_len, void *pdu_data)
				91	{
				92	struct task_struct *tsk = current;
				93	struct blk_io_trace *t;
				94	unsigned long flags;
				95	unsigned long *sequence;
				96	pid_t pid;
				97	int cpu;
				98
				99	if (unlikely(bt->trace_state != Blktrace_running))
				100	return;
				101
				102	what \|= ddir_act[rw & WRITE];
				103	what \|= bio_act[trace_barrier_bit(rw)];
				104	what \|= bio_act[trace_sync_bit(rw)];
Nathan Scott	40359cc	2006-07-06 10:03:28 +0200	[diff] [blame]	105	what \|= bio_act[trace_ahead_bit(rw)];
Jens Axboe	2056a78	2006-03-23 20:00:26 +0100	[diff] [blame]	106
				107	pid = tsk->pid;
				108	if (unlikely(act_log_check(bt, what, sector, pid)))
				109	return;
				110
				111	/*
				112	* A word about the locking here - we disable interrupts to reserve
				113	* some space in the relay per-cpu buffer, to prevent an irq
				114	* from coming in and stepping on our toes. Once reserved, it's
				115	* enough to get preemption disabled to prevent read of this data
				116	* before we are through filling it. get_cpu()/put_cpu() does this
				117	* for us
				118	*/
				119	local_irq_save(flags);
				120
				121	if (unlikely(tsk->btrace_seq != blktrace_seq))
				122	trace_note_tsk(bt, tsk);
				123
				124	t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len);
				125	if (t) {
				126	cpu = smp_processor_id();
				127	sequence = per_cpu_ptr(bt->sequence, cpu);
				128
				129	t->magic = BLK_IO_TRACE_MAGIC \| BLK_IO_TRACE_VERSION;
				130	t->sequence = ++(*sequence);
				131	t->time = sched_clock() - per_cpu(blk_trace_cpu_offset, cpu);
				132	t->sector = sector;
				133	t->bytes = bytes;
				134	t->action = what;
				135	t->pid = pid;
				136	t->device = bt->dev;
				137	t->cpu = cpu;
				138	t->error = error;
				139	t->pdu_len = pdu_len;
				140
				141	if (pdu_len)
				142	memcpy((void ) t + sizeof(t), pdu_data, pdu_len);
				143	}
				144
				145	local_irq_restore(flags);
				146	}
				147
				148	EXPORT_SYMBOL_GPL(__blk_add_trace);
				149
				150	static struct dentry *blk_tree_root;
				151	static struct mutex blk_tree_mutex;
				152	static unsigned int root_users;
				153
				154	static inline void blk_remove_root(void)
				155	{
				156	if (blk_tree_root) {
				157	debugfs_remove(blk_tree_root);
				158	blk_tree_root = NULL;
				159	}
				160	}
				161
				162	static void blk_remove_tree(struct dentry *dir)
				163	{
				164	mutex_lock(&blk_tree_mutex);
				165	debugfs_remove(dir);
				166	if (--root_users == 0)
				167	blk_remove_root();
				168	mutex_unlock(&blk_tree_mutex);
				169	}
				170
				171	static struct dentry blk_create_tree(const char blk_name)
				172	{
				173	struct dentry *dir = NULL;
				174
				175	mutex_lock(&blk_tree_mutex);
				176
				177	if (!blk_tree_root) {
				178	blk_tree_root = debugfs_create_dir("block", NULL);
				179	if (!blk_tree_root)
				180	goto err;
				181	}
				182
				183	dir = debugfs_create_dir(blk_name, blk_tree_root);
				184	if (dir)
				185	root_users++;
				186	else
				187	blk_remove_root();
				188
				189	err:
				190	mutex_unlock(&blk_tree_mutex);
				191	return dir;
				192	}
				193
				194	static void blk_trace_cleanup(struct blk_trace *bt)
				195	{
				196	relay_close(bt->rchan);
				197	debugfs_remove(bt->dropped_file);
				198	blk_remove_tree(bt->dir);
				199	free_percpu(bt->sequence);
				200	kfree(bt);
				201	}
				202
				203	static int blk_trace_remove(request_queue_t *q)
				204	{
				205	struct blk_trace *bt;
				206
				207	bt = xchg(&q->blk_trace, NULL);
				208	if (!bt)
				209	return -EINVAL;
				210
				211	if (bt->trace_state == Blktrace_setup \|\|
				212	bt->trace_state == Blktrace_stopped)
				213	blk_trace_cleanup(bt);
				214
				215	return 0;
				216	}
				217
				218	static int blk_dropped_open(struct inode inode, struct file filp)
				219	{
				220	filp->private_data = inode->u.generic_ip;
				221
				222	return 0;
				223	}
				224
				225	static ssize_t blk_dropped_read(struct file filp, char __user buffer,
				226	size_t count, loff_t *ppos)
				227	{
				228	struct blk_trace *bt = filp->private_data;
				229	char buf[16];
				230
				231	snprintf(buf, sizeof(buf), "%u\n", atomic_read(&bt->dropped));
				232
				233	return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
				234	}
				235
				236	static struct file_operations blk_dropped_fops = {
				237	.owner = THIS_MODULE,
				238	.open = blk_dropped_open,
				239	.read = blk_dropped_read,
				240	};
				241
				242	/*
				243	* Keep track of how many times we encountered a full subbuffer, to aid
				244	* the user space app in telling how many lost events there were.
				245	*/
				246	static int blk_subbuf_start_callback(struct rchan_buf buf, void subbuf,
				247	void *prev_subbuf, size_t prev_padding)
				248	{
				249	struct blk_trace *bt;
				250
				251	if (!relay_buf_full(buf))
				252	return 1;
				253
				254	bt = buf->chan->private_data;
				255	atomic_inc(&bt->dropped);
				256	return 0;
				257	}
				258
				259	static int blk_remove_buf_file_callback(struct dentry *dentry)
				260	{
				261	debugfs_remove(dentry);
				262	return 0;
				263	}
				264
				265	static struct dentry blk_create_buf_file_callback(const char filename,
				266	struct dentry *parent,
				267	int mode,
				268	struct rchan_buf *buf,
				269	int *is_global)
				270	{
				271	return debugfs_create_file(filename, mode, parent, buf,
				272	&relay_file_operations);
				273	}
				274
				275	static struct rchan_callbacks blk_relay_callbacks = {
				276	.subbuf_start = blk_subbuf_start_callback,
				277	.create_buf_file = blk_create_buf_file_callback,
				278	.remove_buf_file = blk_remove_buf_file_callback,
				279	};
				280
				281	/*
				282	* Setup everything required to start tracing
				283	*/
				284	static int blk_trace_setup(request_queue_t q, struct block_device bdev,
				285	char __user *arg)
				286	{
				287	struct blk_user_trace_setup buts;
				288	struct blk_trace old_bt, bt = NULL;
				289	struct dentry *dir = NULL;
				290	char b[BDEVNAME_SIZE];
				291	int ret, i;
				292
				293	if (copy_from_user(&buts, arg, sizeof(buts)))
				294	return -EFAULT;
				295
				296	if (!buts.buf_size \|\| !buts.buf_nr)
				297	return -EINVAL;
				298
				299	strcpy(buts.name, bdevname(bdev, b));
				300
				301	/*
				302	* some device names have larger paths - convert the slashes
				303	* to underscores for this to work as expected
				304	*/
				305	for (i = 0; i < strlen(buts.name); i++)
				306	if (buts.name[i] == '/')
				307	buts.name[i] = '_';
				308
				309	if (copy_to_user(arg, &buts, sizeof(buts)))
				310	return -EFAULT;
				311
				312	ret = -ENOMEM;
				313	bt = kzalloc(sizeof(*bt), GFP_KERNEL);
				314	if (!bt)
				315	goto err;
				316
				317	bt->sequence = alloc_percpu(unsigned long);
				318	if (!bt->sequence)
				319	goto err;
				320
				321	ret = -ENOENT;
				322	dir = blk_create_tree(buts.name);
				323	if (!dir)
				324	goto err;
				325
				326	bt->dir = dir;
				327	bt->dev = bdev->bd_dev;
				328	atomic_set(&bt->dropped, 0);
				329
				330	ret = -EIO;
				331	bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, &blk_dropped_fops);
				332	if (!bt->dropped_file)
				333	goto err;
				334
				335	bt->rchan = relay_open("trace", dir, buts.buf_size, buts.buf_nr, &blk_relay_callbacks);
				336	if (!bt->rchan)
				337	goto err;
				338	bt->rchan->private_data = bt;
				339
				340	bt->act_mask = buts.act_mask;
				341	if (!bt->act_mask)
				342	bt->act_mask = (u16) -1;
				343
				344	bt->start_lba = buts.start_lba;
				345	bt->end_lba = buts.end_lba;
				346	if (!bt->end_lba)
				347	bt->end_lba = -1ULL;
				348
				349	bt->pid = buts.pid;
				350	bt->trace_state = Blktrace_setup;
				351
				352	ret = -EBUSY;
				353	old_bt = xchg(&q->blk_trace, bt);
				354	if (old_bt) {
				355	(void) xchg(&q->blk_trace, old_bt);
				356	goto err;
				357	}
				358
				359	return 0;
				360	err:
				361	if (dir)
				362	blk_remove_tree(dir);
				363	if (bt) {
				364	if (bt->dropped_file)
				365	debugfs_remove(bt->dropped_file);
				366	if (bt->sequence)
				367	free_percpu(bt->sequence);
				368	if (bt->rchan)
				369	relay_close(bt->rchan);
				370	kfree(bt);
				371	}
				372	return ret;
				373	}
				374
				375	static int blk_trace_startstop(request_queue_t *q, int start)
				376	{
				377	struct blk_trace *bt;
				378	int ret;
				379
				380	if ((bt = q->blk_trace) == NULL)
				381	return -EINVAL;
				382
				383	/*
				384	* For starting a trace, we can transition from a setup or stopped
				385	* trace. For stopping a trace, the state must be running
				386	*/
				387	ret = -EINVAL;
				388	if (start) {
				389	if (bt->trace_state == Blktrace_setup \|\|
				390	bt->trace_state == Blktrace_stopped) {
				391	blktrace_seq++;
				392	smp_mb();
				393	bt->trace_state = Blktrace_running;
				394	ret = 0;
				395	}
				396	} else {
				397	if (bt->trace_state == Blktrace_running) {
				398	bt->trace_state = Blktrace_stopped;
				399	relay_flush(bt->rchan);
				400	ret = 0;
				401	}
				402	}
				403
				404	return ret;
				405	}
				406
				407	/**
				408	* blk_trace_ioctl: - handle the ioctls associated with tracing
				409	* @bdev: the block device
				410	* @cmd: the ioctl cmd
				411	* @arg: the argument data, if any
				412	*
				413	**/
				414	int blk_trace_ioctl(struct block_device bdev, unsigned cmd, char __user arg)
				415	{
				416	request_queue_t *q;
				417	int ret, start = 0;
				418
				419	q = bdev_get_queue(bdev);
				420	if (!q)
				421	return -ENXIO;
				422
				423	mutex_lock(&bdev->bd_mutex);
				424
				425	switch (cmd) {
				426	case BLKTRACESETUP:
				427	ret = blk_trace_setup(q, bdev, arg);
				428	break;
				429	case BLKTRACESTART:
				430	start = 1;
				431	case BLKTRACESTOP:
				432	ret = blk_trace_startstop(q, start);
				433	break;
				434	case BLKTRACETEARDOWN:
				435	ret = blk_trace_remove(q);
				436	break;
				437	default:
				438	ret = -ENOTTY;
				439	break;
				440	}
				441
				442	mutex_unlock(&bdev->bd_mutex);
				443	return ret;
				444	}
				445
				446	/**
				447	* blk_trace_shutdown: - stop and cleanup trace structures
				448	* @q: the request queue associated with the device
				449	*
				450	**/
				451	void blk_trace_shutdown(request_queue_t *q)
				452	{
				453	blk_trace_startstop(q, 0);
				454	blk_trace_remove(q);
				455	}
				456
				457	/*
				458	* Average offset over two calls to sched_clock() with a gettimeofday()
				459	* in the middle
				460	*/
				461	static void blk_check_time(unsigned long long *t)
				462	{
				463	unsigned long long a, b;
				464	struct timeval tv;
				465
				466	a = sched_clock();
				467	do_gettimeofday(&tv);
				468	b = sched_clock();
				469
				470	t = tv.tv_sec 1000000000 + tv.tv_usec * 1000;
				471	*t -= (a + b) / 2;
				472	}
				473
				474	static void blk_trace_check_cpu_time(void *data)
				475	{
				476	unsigned long long *t;
				477	int cpu = get_cpu();
				478
				479	t = &per_cpu(blk_trace_cpu_offset, cpu);
				480
				481	/*
				482	* Just call it twice, hopefully the second call will be cache hot
				483	* and a little more precise
				484	*/
				485	blk_check_time(t);
				486	blk_check_time(t);
				487
				488	put_cpu();
				489	}
				490
				491	/*
				492	* Call blk_trace_check_cpu_time() on each CPU to calibrate our inter-CPU
				493	* timings
				494	*/
				495	static void blk_trace_calibrate_offsets(void)
				496	{
				497	unsigned long flags;
				498
				499	smp_call_function(blk_trace_check_cpu_time, NULL, 1, 1);
				500	local_irq_save(flags);
				501	blk_trace_check_cpu_time(NULL);
				502	local_irq_restore(flags);
				503	}
				504
				505	static void blk_trace_set_ht_offsets(void)
				506	{
				507	#if defined(CONFIG_SCHED_SMT)
				508	int cpu, i;
				509
				510	/*
				511	* now make sure HT siblings have the same time offset
				512	*/
				513	preempt_disable();
				514	for_each_online_cpu(cpu) {
				515	unsigned long long cpu_off, sibling_off;
				516
				517	for_each_cpu_mask(i, cpu_sibling_map[cpu]) {
				518	if (i == cpu)
				519	continue;
				520
				521	cpu_off = &per_cpu(blk_trace_cpu_offset, cpu);
				522	sibling_off = &per_cpu(blk_trace_cpu_offset, i);
				523	sibling_off = cpu_off;
				524	}
				525	}
				526	preempt_enable();
				527	#endif
				528	}
				529
				530	static __init int blk_trace_init(void)
				531	{
				532	mutex_init(&blk_tree_mutex);
				533	blk_trace_calibrate_offsets();
				534	blk_trace_set_ht_offsets();
				535
				536	return 0;
				537	}
				538
				539	module_init(blk_trace_init);
				540