Blame - fs/iomap.c - kernel/msm-4.9

blob: 56c19e617a265370cb395e939e78f1772eb49f10 [file] [log] [blame]

Christoph Hellwig	ae259a9	2016-06-21 09:23:11 +1000	[diff] [blame]	1	/*
				2	* Copyright (C) 2010 Red Hat, Inc.
				3	* Copyright (c) 2016 Christoph Hellwig.
				4	*
				5	* This program is free software; you can redistribute it and/or modify it
				6	* under the terms and conditions of the GNU General Public License,
				7	* version 2, as published by the Free Software Foundation.
				8	*
				9	* This program is distributed in the hope it will be useful, but WITHOUT
				10	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
				11	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
				12	* more details.
				13	*/
				14	#include <linux/module.h>
				15	#include <linux/compiler.h>
				16	#include <linux/fs.h>
				17	#include <linux/iomap.h>
				18	#include <linux/uaccess.h>
				19	#include <linux/gfp.h>
				20	#include <linux/mm.h>
				21	#include <linux/swap.h>
				22	#include <linux/pagemap.h>
				23	#include <linux/file.h>
				24	#include <linux/uio.h>
				25	#include <linux/backing-dev.h>
				26	#include <linux/buffer_head.h>
Christoph Hellwig	9a286f0	2016-06-21 09:31:39 +1000	[diff] [blame]	27	#include <linux/dax.h>
Christoph Hellwig	ae259a9	2016-06-21 09:23:11 +1000	[diff] [blame]	28	#include "internal.h"
				29
				30	typedef loff_t (iomap_actor_t)(struct inode inode, loff_t pos, loff_t len,
				31	void data, struct iomap iomap);
				32
				33	/*
				34	* Execute a iomap write on a segment of the mapping that spans a
				35	* contiguous range of pages that have identical block mapping state.
				36	*
				37	* This avoids the need to map pages individually, do individual allocations
				38	* for each page and most importantly avoid the need for filesystem specific
				39	* locking per page. Instead, all the operations are amortised over the entire
				40	* range of pages. It is assumed that the filesystems will lock whatever
				41	* resources they require in the iomap_begin call, and release them in the
				42	* iomap_end call.
				43	*/
				44	static loff_t
				45	iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
				46	struct iomap_ops ops, void data, iomap_actor_t actor)
				47	{
				48	struct iomap iomap = { 0 };
				49	loff_t written = 0, ret;
				50
				51	/*
				52	* Need to map a range from start position for length bytes. This can
				53	* span multiple pages - it is only guaranteed to return a range of a
				54	* single type of pages (e.g. all into a hole, all mapped or all
				55	* unwritten). Failure at this point has nothing to undo.
				56	*
				57	* If allocation is required for this range, reserve the space now so
				58	* that the allocation is guaranteed to succeed later on. Once we copy
				59	* the data into the page cache pages, then we cannot fail otherwise we
				60	* expose transient stale data. If the reserve fails, we can safely
				61	* back out at this point as there is nothing to undo.
				62	*/
				63	ret = ops->iomap_begin(inode, pos, length, flags, &iomap);
				64	if (ret)
				65	return ret;
				66	if (WARN_ON(iomap.offset > pos))
				67	return -EIO;
				68
				69	/*
				70	* Cut down the length to the one actually provided by the filesystem,
				71	* as it might not be able to give us the whole size that we requested.
				72	*/
				73	if (iomap.offset + iomap.length < pos + length)
				74	length = iomap.offset + iomap.length - pos;
				75
				76	/*
				77	* Now that we have guaranteed that the space allocation will succeed.
				78	* we can do the copy-in page by page without having to worry about
				79	* failures exposing transient data.
				80	*/
				81	written = actor(inode, pos, length, data, &iomap);
				82
				83	/*
				84	* Now the data has been copied, commit the range we've copied. This
				85	* should not fail unless the filesystem has had a fatal error.
				86	*/
				87	ret = ops->iomap_end(inode, pos, length, written > 0 ? written : 0,
				88	flags, &iomap);
				89
				90	return written ? written : ret;
				91	}
				92
				93	static void
				94	iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
				95	{
				96	loff_t i_size = i_size_read(inode);
				97
				98	/*
				99	* Only truncate newly allocated pages beyoned EOF, even if the
				100	* write started inside the existing inode size.
				101	*/
				102	if (pos + len > i_size)
				103	truncate_pagecache_range(inode, max(pos, i_size), pos + len);
				104	}
				105
				106	static int
				107	iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
				108	struct page *pagep, struct iomap iomap)
				109	{
				110	pgoff_t index = pos >> PAGE_SHIFT;
				111	struct page *page;
				112	int status = 0;
				113
				114	BUG_ON(pos + len > iomap->offset + iomap->length);
				115
				116	page = grab_cache_page_write_begin(inode->i_mapping, index, flags);
				117	if (!page)
				118	return -ENOMEM;
				119
				120	status = __block_write_begin_int(page, pos, len, NULL, iomap);
				121	if (unlikely(status)) {
				122	unlock_page(page);
				123	put_page(page);
				124	page = NULL;
				125
				126	iomap_write_failed(inode, pos, len);
				127	}
				128
				129	*pagep = page;
				130	return status;
				131	}
				132
				133	static int
				134	iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
				135	unsigned copied, struct page *page)
				136	{
				137	int ret;
				138
				139	ret = generic_write_end(NULL, inode->i_mapping, pos, len,
				140	copied, page, NULL);
				141	if (ret < len)
				142	iomap_write_failed(inode, pos, len);
				143	return ret;
				144	}
				145
				146	static loff_t
				147	iomap_write_actor(struct inode inode, loff_t pos, loff_t length, void data,
				148	struct iomap *iomap)
				149	{
				150	struct iov_iter *i = data;
				151	long status = 0;
				152	ssize_t written = 0;
				153	unsigned int flags = AOP_FLAG_NOFS;
				154
				155	/*
				156	* Copies from kernel address space cannot fail (NFSD is a big user).
				157	*/
				158	if (!iter_is_iovec(i))
				159	flags \|= AOP_FLAG_UNINTERRUPTIBLE;
				160
				161	do {
				162	struct page *page;
				163	unsigned long offset; /* Offset into pagecache page */
				164	unsigned long bytes; /* Bytes to write to page */
				165	size_t copied; /* Bytes copied from user */
				166
				167	offset = (pos & (PAGE_SIZE - 1));
				168	bytes = min_t(unsigned long, PAGE_SIZE - offset,
				169	iov_iter_count(i));
				170	again:
				171	if (bytes > length)
				172	bytes = length;
				173
				174	/*
				175	* Bring in the user page that we will copy from _first_.
				176	* Otherwise there's a nasty deadlock on copying from the
				177	* same page as we're writing to, without it being marked
				178	* up-to-date.
				179	*
				180	* Not only is this an optimisation, but it is also required
				181	* to check that the address is actually valid, when atomic
				182	* usercopies are used, below.
				183	*/
				184	if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
				185	status = -EFAULT;
				186	break;
				187	}
				188
				189	status = iomap_write_begin(inode, pos, bytes, flags, &page,
				190	iomap);
				191	if (unlikely(status))
				192	break;
				193
				194	if (mapping_writably_mapped(inode->i_mapping))
				195	flush_dcache_page(page);
				196
Christoph Hellwig	ae259a9	2016-06-21 09:23:11 +1000	[diff] [blame]	197	copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
Christoph Hellwig	ae259a9	2016-06-21 09:23:11 +1000	[diff] [blame]	198
				199	flush_dcache_page(page);
Christoph Hellwig	ae259a9	2016-06-21 09:23:11 +1000	[diff] [blame]	200
				201	status = iomap_write_end(inode, pos, bytes, copied, page);
				202	if (unlikely(status < 0))
				203	break;
				204	copied = status;
				205
				206	cond_resched();
				207
				208	iov_iter_advance(i, copied);
				209	if (unlikely(copied == 0)) {
				210	/*
				211	* If we were unable to copy any data at all, we must
				212	* fall back to a single segment length write.
				213	*
				214	* If we didn't fallback here, we could livelock
				215	* because not all segments in the iov can be copied at
				216	* once without a pagefault.
				217	*/
				218	bytes = min_t(unsigned long, PAGE_SIZE - offset,
				219	iov_iter_single_seg_count(i));
				220	goto again;
				221	}
				222	pos += copied;
				223	written += copied;
				224	length -= copied;
				225
				226	balance_dirty_pages_ratelimited(inode->i_mapping);
				227	} while (iov_iter_count(i) && length);
				228
				229	return written ? written : status;
				230	}
				231
				232	ssize_t
				233	iomap_file_buffered_write(struct kiocb iocb, struct iov_iter iter,
				234	struct iomap_ops *ops)
				235	{
				236	struct inode *inode = iocb->ki_filp->f_mapping->host;
				237	loff_t pos = iocb->ki_pos, ret = 0, written = 0;
				238
				239	while (iov_iter_count(iter)) {
				240	ret = iomap_apply(inode, pos, iov_iter_count(iter),
				241	IOMAP_WRITE, ops, iter, iomap_write_actor);
				242	if (ret <= 0)
				243	break;
				244	pos += ret;
				245	written += ret;
				246	}
				247
				248	return written ? written : ret;
				249	}
				250	EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
				251
				252	static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
				253	unsigned bytes, struct iomap *iomap)
				254	{
				255	struct page *page;
				256	int status;
				257
				258	status = iomap_write_begin(inode, pos, bytes,
				259	AOP_FLAG_UNINTERRUPTIBLE \| AOP_FLAG_NOFS, &page, iomap);
				260	if (status)
				261	return status;
				262
				263	zero_user(page, offset, bytes);
				264	mark_page_accessed(page);
				265
				266	return iomap_write_end(inode, pos, bytes, bytes, page);
				267	}
				268
Christoph Hellwig	9a286f0	2016-06-21 09:31:39 +1000	[diff] [blame]	269	static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
				270	struct iomap *iomap)
				271	{
				272	sector_t sector = iomap->blkno +
				273	(((pos & ~(PAGE_SIZE - 1)) - iomap->offset) >> 9);
				274
				275	return __dax_zero_page_range(iomap->bdev, sector, offset, bytes);
				276	}
				277
Christoph Hellwig	ae259a9	2016-06-21 09:23:11 +1000	[diff] [blame]	278	static loff_t
				279	iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
				280	void data, struct iomap iomap)
				281	{
				282	bool *did_zero = data;
				283	loff_t written = 0;
				284	int status;
				285
				286	/* already zeroed? we're done. */
				287	if (iomap->type == IOMAP_HOLE \|\| iomap->type == IOMAP_UNWRITTEN)
				288	return count;
				289
				290	do {
				291	unsigned offset, bytes;
				292
				293	offset = pos & (PAGE_SIZE - 1); /* Within page */
				294	bytes = min_t(unsigned, PAGE_SIZE - offset, count);
				295
Christoph Hellwig	9a286f0	2016-06-21 09:31:39 +1000	[diff] [blame]	296	if (IS_DAX(inode))
				297	status = iomap_dax_zero(pos, offset, bytes, iomap);
				298	else
				299	status = iomap_zero(inode, pos, offset, bytes, iomap);
Christoph Hellwig	ae259a9	2016-06-21 09:23:11 +1000	[diff] [blame]	300	if (status < 0)
				301	return status;
				302
				303	pos += bytes;
				304	count -= bytes;
				305	written += bytes;
				306	if (did_zero)
				307	*did_zero = true;
				308	} while (count > 0);
				309
				310	return written;
				311	}
				312
				313	int
				314	iomap_zero_range(struct inode inode, loff_t pos, loff_t len, bool did_zero,
				315	struct iomap_ops *ops)
				316	{
				317	loff_t ret;
				318
				319	while (len > 0) {
				320	ret = iomap_apply(inode, pos, len, IOMAP_ZERO,
				321	ops, did_zero, iomap_zero_range_actor);
				322	if (ret <= 0)
				323	return ret;
				324
				325	pos += ret;
				326	len -= ret;
				327	}
				328
				329	return 0;
				330	}
				331	EXPORT_SYMBOL_GPL(iomap_zero_range);
				332
				333	int
				334	iomap_truncate_page(struct inode inode, loff_t pos, bool did_zero,
				335	struct iomap_ops *ops)
				336	{
				337	unsigned blocksize = (1 << inode->i_blkbits);
				338	unsigned off = pos & (blocksize - 1);
				339
				340	/* Block boundary? Nothing to do */
				341	if (!off)
				342	return 0;
				343	return iomap_zero_range(inode, pos, blocksize - off, did_zero, ops);
				344	}
				345	EXPORT_SYMBOL_GPL(iomap_truncate_page);
				346
				347	static loff_t
				348	iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
				349	void data, struct iomap iomap)
				350	{
				351	struct page *page = data;
				352	int ret;
				353
				354	ret = __block_write_begin_int(page, pos & ~PAGE_MASK, length,
				355	NULL, iomap);
				356	if (ret)
				357	return ret;
				358
				359	block_commit_write(page, 0, length);
				360	return length;
				361	}
				362
				363	int iomap_page_mkwrite(struct vm_area_struct vma, struct vm_fault vmf,
				364	struct iomap_ops *ops)
				365	{
				366	struct page *page = vmf->page;
				367	struct inode *inode = file_inode(vma->vm_file);
				368	unsigned long length;
				369	loff_t offset, size;
				370	ssize_t ret;
				371
				372	lock_page(page);
				373	size = i_size_read(inode);
				374	if ((page->mapping != inode->i_mapping) \|\|
				375	(page_offset(page) > size)) {
				376	/* We overload EFAULT to mean page got truncated */
				377	ret = -EFAULT;
				378	goto out_unlock;
				379	}
				380
				381	/* page is wholly or partially inside EOF */
				382	if (((page->index + 1) << PAGE_SHIFT) > size)
				383	length = size & ~PAGE_MASK;
				384	else
				385	length = PAGE_SIZE;
				386
				387	offset = page_offset(page);
				388	while (length > 0) {
				389	ret = iomap_apply(inode, offset, length, IOMAP_WRITE,
				390	ops, page, iomap_page_mkwrite_actor);
				391	if (unlikely(ret <= 0))
				392	goto out_unlock;
				393	offset += ret;
				394	length -= ret;
				395	}
				396
				397	set_page_dirty(page);
				398	wait_for_stable_page(page);
				399	return 0;
				400	out_unlock:
				401	unlock_page(page);
				402	return ret;
				403	}
				404	EXPORT_SYMBOL_GPL(iomap_page_mkwrite);
Christoph Hellwig	8be9f56	2016-06-21 09:38:45 +1000	[diff] [blame]	405
				406	struct fiemap_ctx {
				407	struct fiemap_extent_info *fi;
				408	struct iomap prev;
				409	};
				410
				411	static int iomap_to_fiemap(struct fiemap_extent_info *fi,
				412	struct iomap *iomap, u32 flags)
				413	{
				414	switch (iomap->type) {
				415	case IOMAP_HOLE:
				416	/* skip holes */
				417	return 0;
				418	case IOMAP_DELALLOC:
				419	flags \|= FIEMAP_EXTENT_DELALLOC \| FIEMAP_EXTENT_UNKNOWN;
				420	break;
				421	case IOMAP_UNWRITTEN:
				422	flags \|= FIEMAP_EXTENT_UNWRITTEN;
				423	break;
				424	case IOMAP_MAPPED:
				425	break;
				426	}
				427
				428	return fiemap_fill_next_extent(fi, iomap->offset,
				429	iomap->blkno != IOMAP_NULL_BLOCK ? iomap->blkno << 9: 0,
				430	iomap->length, flags \| FIEMAP_EXTENT_MERGED);
				431
				432	}
				433
				434	static loff_t
				435	iomap_fiemap_actor(struct inode inode, loff_t pos, loff_t length, void data,
				436	struct iomap *iomap)
				437	{
				438	struct fiemap_ctx *ctx = data;
				439	loff_t ret = length;
				440
				441	if (iomap->type == IOMAP_HOLE)
				442	return length;
				443
				444	ret = iomap_to_fiemap(ctx->fi, &ctx->prev, 0);
				445	ctx->prev = *iomap;
				446	switch (ret) {
				447	case 0: /* success */
				448	return length;
				449	case 1: /* extent array full */
				450	return 0;
				451	default:
				452	return ret;
				453	}
				454	}
				455
				456	int iomap_fiemap(struct inode inode, struct fiemap_extent_info fi,
				457	loff_t start, loff_t len, struct iomap_ops *ops)
				458	{
				459	struct fiemap_ctx ctx;
				460	loff_t ret;
				461
				462	memset(&ctx, 0, sizeof(ctx));
				463	ctx.fi = fi;
				464	ctx.prev.type = IOMAP_HOLE;
				465
				466	ret = fiemap_check_flags(fi, FIEMAP_FLAG_SYNC);
				467	if (ret)
				468	return ret;
				469
Dave Chinner	8896b8f	2016-08-17 08:41:10 +1000	[diff] [blame^]	470	if (fi->fi_flags & FIEMAP_FLAG_SYNC) {
				471	ret = filemap_write_and_wait(inode->i_mapping);
				472	if (ret)
				473	return ret;
				474	}
Christoph Hellwig	8be9f56	2016-06-21 09:38:45 +1000	[diff] [blame]	475
				476	while (len > 0) {
				477	ret = iomap_apply(inode, start, len, 0, ops, &ctx,
				478	iomap_fiemap_actor);
				479	if (ret < 0)
				480	return ret;
				481	if (ret == 0)
				482	break;
				483
				484	start += ret;
				485	len -= ret;
				486	}
				487
				488	if (ctx.prev.type != IOMAP_HOLE) {
				489	ret = iomap_to_fiemap(fi, &ctx.prev, FIEMAP_EXTENT_LAST);
				490	if (ret < 0)
				491	return ret;
				492	}
				493
				494	return 0;
				495	}
				496	EXPORT_SYMBOL_GPL(iomap_fiemap);