Blame - fs/iomap.c - kernel/msm-4.9

blob: 48141b8eff5f4f799f804674c83173863c156b2c [file] [log] [blame]

Christoph Hellwig	ae259a9	2016-06-21 09:23:11 +1000	[diff] [blame]	1	/*
				2	* Copyright (C) 2010 Red Hat, Inc.
				3	* Copyright (c) 2016 Christoph Hellwig.
				4	*
				5	* This program is free software; you can redistribute it and/or modify it
				6	* under the terms and conditions of the GNU General Public License,
				7	* version 2, as published by the Free Software Foundation.
				8	*
				9	* This program is distributed in the hope it will be useful, but WITHOUT
				10	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
				11	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
				12	* more details.
				13	*/
				14	#include <linux/module.h>
				15	#include <linux/compiler.h>
				16	#include <linux/fs.h>
				17	#include <linux/iomap.h>
				18	#include <linux/uaccess.h>
				19	#include <linux/gfp.h>
				20	#include <linux/mm.h>
				21	#include <linux/swap.h>
				22	#include <linux/pagemap.h>
				23	#include <linux/file.h>
				24	#include <linux/uio.h>
				25	#include <linux/backing-dev.h>
				26	#include <linux/buffer_head.h>
Christoph Hellwig	9a286f0	2016-06-21 09:31:39 +1000	[diff] [blame]	27	#include <linux/dax.h>
Christoph Hellwig	ae259a9	2016-06-21 09:23:11 +1000	[diff] [blame]	28	#include "internal.h"
				29
				30	typedef loff_t (iomap_actor_t)(struct inode inode, loff_t pos, loff_t len,
				31	void data, struct iomap iomap);
				32
				33	/*
				34	* Execute a iomap write on a segment of the mapping that spans a
				35	* contiguous range of pages that have identical block mapping state.
				36	*
				37	* This avoids the need to map pages individually, do individual allocations
				38	* for each page and most importantly avoid the need for filesystem specific
				39	* locking per page. Instead, all the operations are amortised over the entire
				40	* range of pages. It is assumed that the filesystems will lock whatever
				41	* resources they require in the iomap_begin call, and release them in the
				42	* iomap_end call.
				43	*/
				44	static loff_t
				45	iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
				46	struct iomap_ops ops, void data, iomap_actor_t actor)
				47	{
				48	struct iomap iomap = { 0 };
				49	loff_t written = 0, ret;
				50
				51	/*
				52	* Need to map a range from start position for length bytes. This can
				53	* span multiple pages - it is only guaranteed to return a range of a
				54	* single type of pages (e.g. all into a hole, all mapped or all
				55	* unwritten). Failure at this point has nothing to undo.
				56	*
				57	* If allocation is required for this range, reserve the space now so
				58	* that the allocation is guaranteed to succeed later on. Once we copy
				59	* the data into the page cache pages, then we cannot fail otherwise we
				60	* expose transient stale data. If the reserve fails, we can safely
				61	* back out at this point as there is nothing to undo.
				62	*/
				63	ret = ops->iomap_begin(inode, pos, length, flags, &iomap);
				64	if (ret)
				65	return ret;
				66	if (WARN_ON(iomap.offset > pos))
				67	return -EIO;
				68
				69	/*
				70	* Cut down the length to the one actually provided by the filesystem,
				71	* as it might not be able to give us the whole size that we requested.
				72	*/
				73	if (iomap.offset + iomap.length < pos + length)
				74	length = iomap.offset + iomap.length - pos;
				75
				76	/*
				77	* Now that we have guaranteed that the space allocation will succeed.
				78	* we can do the copy-in page by page without having to worry about
				79	* failures exposing transient data.
				80	*/
				81	written = actor(inode, pos, length, data, &iomap);
				82
				83	/*
				84	* Now the data has been copied, commit the range we've copied. This
				85	* should not fail unless the filesystem has had a fatal error.
				86	*/
				87	ret = ops->iomap_end(inode, pos, length, written > 0 ? written : 0,
				88	flags, &iomap);
				89
				90	return written ? written : ret;
				91	}
				92
				93	static void
				94	iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
				95	{
				96	loff_t i_size = i_size_read(inode);
				97
				98	/*
				99	* Only truncate newly allocated pages beyoned EOF, even if the
				100	* write started inside the existing inode size.
				101	*/
				102	if (pos + len > i_size)
				103	truncate_pagecache_range(inode, max(pos, i_size), pos + len);
				104	}
				105
				106	static int
				107	iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
				108	struct page *pagep, struct iomap iomap)
				109	{
				110	pgoff_t index = pos >> PAGE_SHIFT;
				111	struct page *page;
				112	int status = 0;
				113
				114	BUG_ON(pos + len > iomap->offset + iomap->length);
				115
				116	page = grab_cache_page_write_begin(inode->i_mapping, index, flags);
				117	if (!page)
				118	return -ENOMEM;
				119
				120	status = __block_write_begin_int(page, pos, len, NULL, iomap);
				121	if (unlikely(status)) {
				122	unlock_page(page);
				123	put_page(page);
				124	page = NULL;
				125
				126	iomap_write_failed(inode, pos, len);
				127	}
				128
				129	*pagep = page;
				130	return status;
				131	}
				132
				133	static int
				134	iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
				135	unsigned copied, struct page *page)
				136	{
				137	int ret;
				138
				139	ret = generic_write_end(NULL, inode->i_mapping, pos, len,
				140	copied, page, NULL);
				141	if (ret < len)
				142	iomap_write_failed(inode, pos, len);
				143	return ret;
				144	}
				145
				146	static loff_t
				147	iomap_write_actor(struct inode inode, loff_t pos, loff_t length, void data,
				148	struct iomap *iomap)
				149	{
				150	struct iov_iter *i = data;
				151	long status = 0;
				152	ssize_t written = 0;
				153	unsigned int flags = AOP_FLAG_NOFS;
				154
				155	/*
				156	* Copies from kernel address space cannot fail (NFSD is a big user).
				157	*/
				158	if (!iter_is_iovec(i))
				159	flags \|= AOP_FLAG_UNINTERRUPTIBLE;
				160
				161	do {
				162	struct page *page;
				163	unsigned long offset; /* Offset into pagecache page */
				164	unsigned long bytes; /* Bytes to write to page */
				165	size_t copied; /* Bytes copied from user */
				166
				167	offset = (pos & (PAGE_SIZE - 1));
				168	bytes = min_t(unsigned long, PAGE_SIZE - offset,
				169	iov_iter_count(i));
				170	again:
				171	if (bytes > length)
				172	bytes = length;
				173
				174	/*
				175	* Bring in the user page that we will copy from _first_.
				176	* Otherwise there's a nasty deadlock on copying from the
				177	* same page as we're writing to, without it being marked
				178	* up-to-date.
				179	*
				180	* Not only is this an optimisation, but it is also required
				181	* to check that the address is actually valid, when atomic
				182	* usercopies are used, below.
				183	*/
				184	if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
				185	status = -EFAULT;
				186	break;
				187	}
				188
				189	status = iomap_write_begin(inode, pos, bytes, flags, &page,
				190	iomap);
				191	if (unlikely(status))
				192	break;
				193
				194	if (mapping_writably_mapped(inode->i_mapping))
				195	flush_dcache_page(page);
				196
				197	pagefault_disable();
				198	copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
				199	pagefault_enable();
				200
				201	flush_dcache_page(page);
				202	mark_page_accessed(page);
				203
				204	status = iomap_write_end(inode, pos, bytes, copied, page);
				205	if (unlikely(status < 0))
				206	break;
				207	copied = status;
				208
				209	cond_resched();
				210
				211	iov_iter_advance(i, copied);
				212	if (unlikely(copied == 0)) {
				213	/*
				214	* If we were unable to copy any data at all, we must
				215	* fall back to a single segment length write.
				216	*
				217	* If we didn't fallback here, we could livelock
				218	* because not all segments in the iov can be copied at
				219	* once without a pagefault.
				220	*/
				221	bytes = min_t(unsigned long, PAGE_SIZE - offset,
				222	iov_iter_single_seg_count(i));
				223	goto again;
				224	}
				225	pos += copied;
				226	written += copied;
				227	length -= copied;
				228
				229	balance_dirty_pages_ratelimited(inode->i_mapping);
				230	} while (iov_iter_count(i) && length);
				231
				232	return written ? written : status;
				233	}
				234
				235	ssize_t
				236	iomap_file_buffered_write(struct kiocb iocb, struct iov_iter iter,
				237	struct iomap_ops *ops)
				238	{
				239	struct inode *inode = iocb->ki_filp->f_mapping->host;
				240	loff_t pos = iocb->ki_pos, ret = 0, written = 0;
				241
				242	while (iov_iter_count(iter)) {
				243	ret = iomap_apply(inode, pos, iov_iter_count(iter),
				244	IOMAP_WRITE, ops, iter, iomap_write_actor);
				245	if (ret <= 0)
				246	break;
				247	pos += ret;
				248	written += ret;
				249	}
				250
				251	return written ? written : ret;
				252	}
				253	EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
				254
				255	static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
				256	unsigned bytes, struct iomap *iomap)
				257	{
				258	struct page *page;
				259	int status;
				260
				261	status = iomap_write_begin(inode, pos, bytes,
				262	AOP_FLAG_UNINTERRUPTIBLE \| AOP_FLAG_NOFS, &page, iomap);
				263	if (status)
				264	return status;
				265
				266	zero_user(page, offset, bytes);
				267	mark_page_accessed(page);
				268
				269	return iomap_write_end(inode, pos, bytes, bytes, page);
				270	}
				271
Christoph Hellwig	9a286f0	2016-06-21 09:31:39 +1000	[diff] [blame]	272	static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
				273	struct iomap *iomap)
				274	{
				275	sector_t sector = iomap->blkno +
				276	(((pos & ~(PAGE_SIZE - 1)) - iomap->offset) >> 9);
				277
				278	return __dax_zero_page_range(iomap->bdev, sector, offset, bytes);
				279	}
				280
Christoph Hellwig	ae259a9	2016-06-21 09:23:11 +1000	[diff] [blame]	281	static loff_t
				282	iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
				283	void data, struct iomap iomap)
				284	{
				285	bool *did_zero = data;
				286	loff_t written = 0;
				287	int status;
				288
				289	/* already zeroed? we're done. */
				290	if (iomap->type == IOMAP_HOLE \|\| iomap->type == IOMAP_UNWRITTEN)
				291	return count;
				292
				293	do {
				294	unsigned offset, bytes;
				295
				296	offset = pos & (PAGE_SIZE - 1); /* Within page */
				297	bytes = min_t(unsigned, PAGE_SIZE - offset, count);
				298
Christoph Hellwig	9a286f0	2016-06-21 09:31:39 +1000	[diff] [blame]	299	if (IS_DAX(inode))
				300	status = iomap_dax_zero(pos, offset, bytes, iomap);
				301	else
				302	status = iomap_zero(inode, pos, offset, bytes, iomap);
Christoph Hellwig	ae259a9	2016-06-21 09:23:11 +1000	[diff] [blame]	303	if (status < 0)
				304	return status;
				305
				306	pos += bytes;
				307	count -= bytes;
				308	written += bytes;
				309	if (did_zero)
				310	*did_zero = true;
				311	} while (count > 0);
				312
				313	return written;
				314	}
				315
				316	int
				317	iomap_zero_range(struct inode inode, loff_t pos, loff_t len, bool did_zero,
				318	struct iomap_ops *ops)
				319	{
				320	loff_t ret;
				321
				322	while (len > 0) {
				323	ret = iomap_apply(inode, pos, len, IOMAP_ZERO,
				324	ops, did_zero, iomap_zero_range_actor);
				325	if (ret <= 0)
				326	return ret;
				327
				328	pos += ret;
				329	len -= ret;
				330	}
				331
				332	return 0;
				333	}
				334	EXPORT_SYMBOL_GPL(iomap_zero_range);
				335
				336	int
				337	iomap_truncate_page(struct inode inode, loff_t pos, bool did_zero,
				338	struct iomap_ops *ops)
				339	{
				340	unsigned blocksize = (1 << inode->i_blkbits);
				341	unsigned off = pos & (blocksize - 1);
				342
				343	/* Block boundary? Nothing to do */
				344	if (!off)
				345	return 0;
				346	return iomap_zero_range(inode, pos, blocksize - off, did_zero, ops);
				347	}
				348	EXPORT_SYMBOL_GPL(iomap_truncate_page);
				349
				350	static loff_t
				351	iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
				352	void data, struct iomap iomap)
				353	{
				354	struct page *page = data;
				355	int ret;
				356
				357	ret = __block_write_begin_int(page, pos & ~PAGE_MASK, length,
				358	NULL, iomap);
				359	if (ret)
				360	return ret;
				361
				362	block_commit_write(page, 0, length);
				363	return length;
				364	}
				365
				366	int iomap_page_mkwrite(struct vm_area_struct vma, struct vm_fault vmf,
				367	struct iomap_ops *ops)
				368	{
				369	struct page *page = vmf->page;
				370	struct inode *inode = file_inode(vma->vm_file);
				371	unsigned long length;
				372	loff_t offset, size;
				373	ssize_t ret;
				374
				375	lock_page(page);
				376	size = i_size_read(inode);
				377	if ((page->mapping != inode->i_mapping) \|\|
				378	(page_offset(page) > size)) {
				379	/* We overload EFAULT to mean page got truncated */
				380	ret = -EFAULT;
				381	goto out_unlock;
				382	}
				383
				384	/* page is wholly or partially inside EOF */
				385	if (((page->index + 1) << PAGE_SHIFT) > size)
				386	length = size & ~PAGE_MASK;
				387	else
				388	length = PAGE_SIZE;
				389
				390	offset = page_offset(page);
				391	while (length > 0) {
				392	ret = iomap_apply(inode, offset, length, IOMAP_WRITE,
				393	ops, page, iomap_page_mkwrite_actor);
				394	if (unlikely(ret <= 0))
				395	goto out_unlock;
				396	offset += ret;
				397	length -= ret;
				398	}
				399
				400	set_page_dirty(page);
				401	wait_for_stable_page(page);
				402	return 0;
				403	out_unlock:
				404	unlock_page(page);
				405	return ret;
				406	}
				407	EXPORT_SYMBOL_GPL(iomap_page_mkwrite);
Christoph Hellwig	8be9f56	2016-06-21 09:38:45 +1000	[diff] [blame^]	408
				409	struct fiemap_ctx {
				410	struct fiemap_extent_info *fi;
				411	struct iomap prev;
				412	};
				413
				414	static int iomap_to_fiemap(struct fiemap_extent_info *fi,
				415	struct iomap *iomap, u32 flags)
				416	{
				417	switch (iomap->type) {
				418	case IOMAP_HOLE:
				419	/* skip holes */
				420	return 0;
				421	case IOMAP_DELALLOC:
				422	flags \|= FIEMAP_EXTENT_DELALLOC \| FIEMAP_EXTENT_UNKNOWN;
				423	break;
				424	case IOMAP_UNWRITTEN:
				425	flags \|= FIEMAP_EXTENT_UNWRITTEN;
				426	break;
				427	case IOMAP_MAPPED:
				428	break;
				429	}
				430
				431	return fiemap_fill_next_extent(fi, iomap->offset,
				432	iomap->blkno != IOMAP_NULL_BLOCK ? iomap->blkno << 9: 0,
				433	iomap->length, flags \| FIEMAP_EXTENT_MERGED);
				434
				435	}
				436
				437	static loff_t
				438	iomap_fiemap_actor(struct inode inode, loff_t pos, loff_t length, void data,
				439	struct iomap *iomap)
				440	{
				441	struct fiemap_ctx *ctx = data;
				442	loff_t ret = length;
				443
				444	if (iomap->type == IOMAP_HOLE)
				445	return length;
				446
				447	ret = iomap_to_fiemap(ctx->fi, &ctx->prev, 0);
				448	ctx->prev = *iomap;
				449	switch (ret) {
				450	case 0: /* success */
				451	return length;
				452	case 1: /* extent array full */
				453	return 0;
				454	default:
				455	return ret;
				456	}
				457	}
				458
				459	int iomap_fiemap(struct inode inode, struct fiemap_extent_info fi,
				460	loff_t start, loff_t len, struct iomap_ops *ops)
				461	{
				462	struct fiemap_ctx ctx;
				463	loff_t ret;
				464
				465	memset(&ctx, 0, sizeof(ctx));
				466	ctx.fi = fi;
				467	ctx.prev.type = IOMAP_HOLE;
				468
				469	ret = fiemap_check_flags(fi, FIEMAP_FLAG_SYNC);
				470	if (ret)
				471	return ret;
				472
				473	ret = filemap_write_and_wait(inode->i_mapping);
				474	if (ret)
				475	return ret;
				476
				477	while (len > 0) {
				478	ret = iomap_apply(inode, start, len, 0, ops, &ctx,
				479	iomap_fiemap_actor);
				480	if (ret < 0)
				481	return ret;
				482	if (ret == 0)
				483	break;
				484
				485	start += ret;
				486	len -= ret;
				487	}
				488
				489	if (ctx.prev.type != IOMAP_HOLE) {
				490	ret = iomap_to_fiemap(fi, &ctx.prev, FIEMAP_EXTENT_LAST);
				491	if (ret < 0)
				492	return ret;
				493	}
				494
				495	return 0;
				496	}
				497	EXPORT_SYMBOL_GPL(iomap_fiemap);