Blame - fs/dax.c - kernel/msm-4.9

blob: 69c3126a05b4074a9718c672c36d69ee302374b7 [file] [log] [blame]

Matthew Wilcox	d475c63	2015-02-16 15:58:56 -0800	[diff] [blame]	1	/*
				2	* fs/dax.c - Direct Access filesystem code
				3	* Copyright (c) 2013-2014 Intel Corporation
				4	* Author: Matthew Wilcox <matthew.r.wilcox@intel.com>
				5	* Author: Ross Zwisler <ross.zwisler@linux.intel.com>
				6	*
				7	* This program is free software; you can redistribute it and/or modify it
				8	* under the terms and conditions of the GNU General Public License,
				9	* version 2, as published by the Free Software Foundation.
				10	*
				11	* This program is distributed in the hope it will be useful, but WITHOUT
				12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
				13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
				14	* more details.
				15	*/
				16
				17	#include <linux/atomic.h>
				18	#include <linux/blkdev.h>
				19	#include <linux/buffer_head.h>
				20	#include <linux/fs.h>
				21	#include <linux/genhd.h>
				22	#include <linux/mutex.h>
Matthew Wilcox	289c6ae	2015-02-16 15:58:59 -0800	[diff] [blame^]	23	#include <linux/sched.h>
Matthew Wilcox	d475c63	2015-02-16 15:58:56 -0800	[diff] [blame]	24	#include <linux/uio.h>
				25
Matthew Wilcox	289c6ae	2015-02-16 15:58:59 -0800	[diff] [blame^]	26	int dax_clear_blocks(struct inode *inode, sector_t block, long size)
				27	{
				28	struct block_device *bdev = inode->i_sb->s_bdev;
				29	sector_t sector = block << (inode->i_blkbits - 9);
				30
				31	might_sleep();
				32	do {
				33	void *addr;
				34	unsigned long pfn;
				35	long count;
				36
				37	count = bdev_direct_access(bdev, sector, &addr, &pfn, size);
				38	if (count < 0)
				39	return count;
				40	BUG_ON(size < count);
				41	while (count > 0) {
				42	unsigned pgsz = PAGE_SIZE - offset_in_page(addr);
				43	if (pgsz > count)
				44	pgsz = count;
				45	if (pgsz < PAGE_SIZE)
				46	memset(addr, 0, pgsz);
				47	else
				48	clear_page(addr);
				49	addr += pgsz;
				50	size -= pgsz;
				51	count -= pgsz;
				52	BUG_ON(pgsz & 511);
				53	sector += pgsz / 512;
				54	cond_resched();
				55	}
				56	} while (size);
				57
				58	return 0;
				59	}
				60	EXPORT_SYMBOL_GPL(dax_clear_blocks);
				61
Matthew Wilcox	d475c63	2015-02-16 15:58:56 -0800	[diff] [blame]	62	static long dax_get_addr(struct buffer_head bh, void *addr, unsigned blkbits)
				63	{
				64	unsigned long pfn;
				65	sector_t sector = bh->b_blocknr << (blkbits - 9);
				66	return bdev_direct_access(bh->b_bdev, sector, addr, &pfn, bh->b_size);
				67	}
				68
				69	static void dax_new_buf(void *addr, unsigned size, unsigned first, loff_t pos,
				70	loff_t end)
				71	{
				72	loff_t final = end - pos + first; /* The final byte of the buffer */
				73
				74	if (first > 0)
				75	memset(addr, 0, first);
				76	if (final < size)
				77	memset(addr + final, 0, size - final);
				78	}
				79
				80	static bool buffer_written(struct buffer_head *bh)
				81	{
				82	return buffer_mapped(bh) && !buffer_unwritten(bh);
				83	}
				84
				85	/*
				86	* When ext4 encounters a hole, it returns without modifying the buffer_head
				87	* which means that we can't trust b_size. To cope with this, we set b_state
				88	* to 0 before calling get_block and, if any bit is set, we know we can trust
				89	* b_size. Unfortunate, really, since ext4 knows precisely how long a hole is
				90	* and would save us time calling get_block repeatedly.
				91	*/
				92	static bool buffer_size_valid(struct buffer_head *bh)
				93	{
				94	return bh->b_state != 0;
				95	}
				96
				97	static ssize_t dax_io(int rw, struct inode inode, struct iov_iter iter,
				98	loff_t start, loff_t end, get_block_t get_block,
				99	struct buffer_head *bh)
				100	{
				101	ssize_t retval = 0;
				102	loff_t pos = start;
				103	loff_t max = start;
				104	loff_t bh_max = start;
				105	void *addr;
				106	bool hole = false;
				107
				108	if (rw != WRITE)
				109	end = min(end, i_size_read(inode));
				110
				111	while (pos < end) {
				112	unsigned len;
				113	if (pos == max) {
				114	unsigned blkbits = inode->i_blkbits;
				115	sector_t block = pos >> blkbits;
				116	unsigned first = pos - (block << blkbits);
				117	long size;
				118
				119	if (pos == bh_max) {
				120	bh->b_size = PAGE_ALIGN(end - pos);
				121	bh->b_state = 0;
				122	retval = get_block(inode, block, bh,
				123	rw == WRITE);
				124	if (retval)
				125	break;
				126	if (!buffer_size_valid(bh))
				127	bh->b_size = 1 << blkbits;
				128	bh_max = pos - first + bh->b_size;
				129	} else {
				130	unsigned done = bh->b_size -
				131	(bh_max - (pos - first));
				132	bh->b_blocknr += done >> blkbits;
				133	bh->b_size -= done;
				134	}
				135
				136	hole = (rw != WRITE) && !buffer_written(bh);
				137	if (hole) {
				138	addr = NULL;
				139	size = bh->b_size - first;
				140	} else {
				141	retval = dax_get_addr(bh, &addr, blkbits);
				142	if (retval < 0)
				143	break;
				144	if (buffer_unwritten(bh) \|\| buffer_new(bh))
				145	dax_new_buf(addr, retval, first, pos,
				146	end);
				147	addr += first;
				148	size = retval - first;
				149	}
				150	max = min(pos + size, end);
				151	}
				152
				153	if (rw == WRITE)
				154	len = copy_from_iter(addr, max - pos, iter);
				155	else if (!hole)
				156	len = copy_to_iter(addr, max - pos, iter);
				157	else
				158	len = iov_iter_zero(max - pos, iter);
				159
				160	if (!len)
				161	break;
				162
				163	pos += len;
				164	addr += len;
				165	}
				166
				167	return (pos == start) ? retval : pos - start;
				168	}
				169
				170	/**
				171	* dax_do_io - Perform I/O to a DAX file
				172	* @rw: READ to read or WRITE to write
				173	* @iocb: The control block for this I/O
				174	* @inode: The file which the I/O is directed at
				175	* @iter: The addresses to do I/O from or to
				176	* @pos: The file offset where the I/O starts
				177	* @get_block: The filesystem method used to translate file offsets to blocks
				178	* @end_io: A filesystem callback for I/O completion
				179	* @flags: See below
				180	*
				181	* This function uses the same locking scheme as do_blockdev_direct_IO:
				182	* If @flags has DIO_LOCKING set, we assume that the i_mutex is held by the
				183	* caller for writes. For reads, we take and release the i_mutex ourselves.
				184	* If DIO_LOCKING is not set, the filesystem takes care of its own locking.
				185	* As with do_blockdev_direct_IO(), we increment i_dio_count while the I/O
				186	* is in progress.
				187	*/
				188	ssize_t dax_do_io(int rw, struct kiocb iocb, struct inode inode,
				189	struct iov_iter *iter, loff_t pos,
				190	get_block_t get_block, dio_iodone_t end_io, int flags)
				191	{
				192	struct buffer_head bh;
				193	ssize_t retval = -EINVAL;
				194	loff_t end = pos + iov_iter_count(iter);
				195
				196	memset(&bh, 0, sizeof(bh));
				197
				198	if ((flags & DIO_LOCKING) && (rw == READ)) {
				199	struct address_space *mapping = inode->i_mapping;
				200	mutex_lock(&inode->i_mutex);
				201	retval = filemap_write_and_wait_range(mapping, pos, end - 1);
				202	if (retval) {
				203	mutex_unlock(&inode->i_mutex);
				204	goto out;
				205	}
				206	}
				207
				208	/* Protects against truncate */
				209	atomic_inc(&inode->i_dio_count);
				210
				211	retval = dax_io(rw, inode, iter, pos, end, get_block, &bh);
				212
				213	if ((flags & DIO_LOCKING) && (rw == READ))
				214	mutex_unlock(&inode->i_mutex);
				215
				216	if ((retval > 0) && end_io)
				217	end_io(iocb, pos, retval, bh.b_private);
				218
				219	inode_dio_done(inode);
				220	out:
				221	return retval;
				222	}
				223	EXPORT_SYMBOL_GPL(dax_do_io);