Blame - fs/xfs/linux-2.6/xfs_file.c - fp2-dev/kernel/msm

blob: fbbf657df0cd581f0979dba3744295ecfe087813 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	2	* Copyright (c) 2000-2005 Silicon Graphics, Inc.
				3	* All Rights Reserved.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	4	*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	5	* This program is free software; you can redistribute it and/or
				6	* modify it under the terms of the GNU General Public License as
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	7	* published by the Free Software Foundation.
				8	*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	9	* This program is distributed in the hope that it would be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	* GNU General Public License for more details.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	13	*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	14	* You should have received a copy of the GNU General Public License
				15	* along with this program; if not, write the Free Software Foundation,
				16	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	17	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	18	#include "xfs.h"
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	19	#include "xfs_fs.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	20	#include "xfs_bit.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	21	#include "xfs_log.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	22	#include "xfs_inum.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	23	#include "xfs_sb.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	24	#include "xfs_ag.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	25	#include "xfs_trans.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	26	#include "xfs_mount.h"
				27	#include "xfs_bmap_btree.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	28	#include "xfs_alloc.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	29	#include "xfs_dinode.h"
				30	#include "xfs_inode.h"
Christoph Hellwig	fd3200b	2010-02-15 09:44:48 +0000	[diff] [blame]	31	#include "xfs_inode_item.h"
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	32	#include "xfs_bmap.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	33	#include "xfs_error.h"
Christoph Hellwig	739bfb2	2007-08-29 10:58:01 +1000	[diff] [blame]	34	#include "xfs_vnodeops.h"
Christoph Hellwig	f999a5b	2008-11-28 14:23:32 +1100	[diff] [blame]	35	#include "xfs_da_btree.h"
Christoph Hellwig	ddcd856	2008-12-03 07:55:34 -0500	[diff] [blame]	36	#include "xfs_ioctl.h"
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	37	#include "xfs_trace.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	38
				39	#include <linux/dcache.h>
Christoph Hellwig	2fe17c1	2011-01-14 13:07:43 +0100	[diff] [blame]	40	#include <linux/falloc.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	41
Alexey Dobriyan	f0f37e2	2009-09-27 22:29:37 +0400	[diff] [blame]	42	static const struct vm_operations_struct xfs_file_vm_ops;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	43
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	44	/*
Dave Chinner	487f84f	2011-01-12 11:37:10 +1100	[diff] [blame]	45	* Locking primitives for read and write IO paths to ensure we consistently use
				46	* and order the inode->i_mutex, ip->i_lock and ip->i_iolock.
				47	*/
				48	static inline void
				49	xfs_rw_ilock(
				50	struct xfs_inode *ip,
				51	int type)
				52	{
				53	if (type & XFS_IOLOCK_EXCL)
				54	mutex_lock(&VFS_I(ip)->i_mutex);
				55	xfs_ilock(ip, type);
				56	}
				57
				58	static inline void
				59	xfs_rw_iunlock(
				60	struct xfs_inode *ip,
				61	int type)
				62	{
				63	xfs_iunlock(ip, type);
				64	if (type & XFS_IOLOCK_EXCL)
				65	mutex_unlock(&VFS_I(ip)->i_mutex);
				66	}
				67
				68	static inline void
				69	xfs_rw_ilock_demote(
				70	struct xfs_inode *ip,
				71	int type)
				72	{
				73	xfs_ilock_demote(ip, type);
				74	if (type & XFS_IOLOCK_EXCL)
				75	mutex_unlock(&VFS_I(ip)->i_mutex);
				76	}
				77
				78	/*
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	79	* xfs_iozero
				80	*
				81	* xfs_iozero clears the specified range of buffer supplied,
				82	* and marks all the affected blocks as valid and modified. If
				83	* an affected block is not allocated, it will be allocated. If
				84	* an affected block is not completely overwritten, and is not
				85	* valid before the operation, it will be read from disk before
				86	* being partially zeroed.
				87	*/
				88	STATIC int
				89	xfs_iozero(
				90	struct xfs_inode ip, / inode */
				91	loff_t pos, /* offset in file */
				92	size_t count) /* size of data to zero */
				93	{
				94	struct page *page;
				95	struct address_space *mapping;
				96	int status;
				97
				98	mapping = VFS_I(ip)->i_mapping;
				99	do {
				100	unsigned offset, bytes;
				101	void *fsdata;
				102
				103	offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
				104	bytes = PAGE_CACHE_SIZE - offset;
				105	if (bytes > count)
				106	bytes = count;
				107
				108	status = pagecache_write_begin(NULL, mapping, pos, bytes,
				109	AOP_FLAG_UNINTERRUPTIBLE,
				110	&page, &fsdata);
				111	if (status)
				112	break;
				113
				114	zero_user(page, offset, bytes);
				115
				116	status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
				117	page, fsdata);
				118	WARN_ON(status <= 0); /* can't return less than zero! */
				119	pos += bytes;
				120	count -= bytes;
				121	status = 0;
				122	} while (count);
				123
				124	return (-status);
				125	}
				126
Christoph Hellwig	fd3200b	2010-02-15 09:44:48 +0000	[diff] [blame]	127	STATIC int
				128	xfs_file_fsync(
				129	struct file *file,
Josef Bacik	02c24a8	2011-07-16 20:44:56 -0400	[diff] [blame^]	130	loff_t start,
				131	loff_t end,
Christoph Hellwig	fd3200b	2010-02-15 09:44:48 +0000	[diff] [blame]	132	int datasync)
				133	{
Christoph Hellwig	7ea8085	2010-05-26 17:53:25 +0200	[diff] [blame]	134	struct inode *inode = file->f_mapping->host;
				135	struct xfs_inode *ip = XFS_I(inode);
Christoph Hellwig	a27a263	2011-06-16 12:02:23 +0000	[diff] [blame]	136	struct xfs_mount *mp = ip->i_mount;
Christoph Hellwig	fd3200b	2010-02-15 09:44:48 +0000	[diff] [blame]	137	struct xfs_trans *tp;
				138	int error = 0;
				139	int log_flushed = 0;
				140
Christoph Hellwig	cca28fb	2010-06-24 11:57:09 +1000	[diff] [blame]	141	trace_xfs_file_fsync(ip);
Christoph Hellwig	fd3200b	2010-02-15 09:44:48 +0000	[diff] [blame]	142
Josef Bacik	02c24a8	2011-07-16 20:44:56 -0400	[diff] [blame^]	143	error = filemap_write_and_wait_range(inode->i_mapping, start, end);
				144	if (error)
				145	return error;
				146
Christoph Hellwig	a27a263	2011-06-16 12:02:23 +0000	[diff] [blame]	147	if (XFS_FORCED_SHUTDOWN(mp))
Christoph Hellwig	fd3200b	2010-02-15 09:44:48 +0000	[diff] [blame]	148	return -XFS_ERROR(EIO);
				149
				150	xfs_iflags_clear(ip, XFS_ITRUNCATED);
				151
Christoph Hellwig	37bc574	2010-04-20 17:00:59 +1000	[diff] [blame]	152	xfs_ioend_wait(ip);
				153
Christoph Hellwig	a27a263	2011-06-16 12:02:23 +0000	[diff] [blame]	154	if (mp->m_flags & XFS_MOUNT_BARRIER) {
				155	/*
				156	* If we have an RT and/or log subvolume we need to make sure
				157	* to flush the write cache the device used for file data
				158	* first. This is to ensure newly written file data make
				159	* it to disk before logging the new inode size in case of
				160	* an extending write.
				161	*/
				162	if (XFS_IS_REALTIME_INODE(ip))
				163	xfs_blkdev_issue_flush(mp->m_rtdev_targp);
				164	else if (mp->m_logdev_targp != mp->m_ddev_targp)
				165	xfs_blkdev_issue_flush(mp->m_ddev_targp);
				166	}
				167
Christoph Hellwig	fd3200b	2010-02-15 09:44:48 +0000	[diff] [blame]	168	/*
				169	* We always need to make sure that the required inode state is safe on
				170	* disk. The inode might be clean but we still might need to force the
				171	* log because of committed transactions that haven't hit the disk yet.
				172	* Likewise, there could be unflushed non-transactional changes to the
				173	* inode core that have to go to disk and this requires us to issue
				174	* a synchronous transaction to capture these changes correctly.
				175	*
				176	* This code relies on the assumption that if the i_update_core field
				177	* of the inode is clear and the inode is unpinned then it is clean
				178	* and no action is required.
				179	*/
				180	xfs_ilock(ip, XFS_ILOCK_SHARED);
				181
Christoph Hellwig	66d834e	2010-02-15 09:44:49 +0000	[diff] [blame]	182	/*
				183	* First check if the VFS inode is marked dirty. All the dirtying
				184	* of non-transactional updates no goes through mark_inode_dirty*,
				185	* which allows us to distinguish beteeen pure timestamp updates
				186	* and i_size updates which need to be caught for fdatasync.
				187	* After that also theck for the dirty state in the XFS inode, which
				188	* might gets cleared when the inode gets written out via the AIL
				189	* or xfs_iflush_cluster.
				190	*/
Christoph Hellwig	7ea8085	2010-05-26 17:53:25 +0200	[diff] [blame]	191	if (((inode->i_state & I_DIRTY_DATASYNC) \|\|
				192	((inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
Christoph Hellwig	66d834e	2010-02-15 09:44:49 +0000	[diff] [blame]	193	ip->i_update_core) {
Christoph Hellwig	fd3200b	2010-02-15 09:44:48 +0000	[diff] [blame]	194	/*
				195	* Kick off a transaction to log the inode core to get the
				196	* updates. The sync transaction will also force the log.
				197	*/
				198	xfs_iunlock(ip, XFS_ILOCK_SHARED);
Christoph Hellwig	a27a263	2011-06-16 12:02:23 +0000	[diff] [blame]	199	tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
Christoph Hellwig	fd3200b	2010-02-15 09:44:48 +0000	[diff] [blame]	200	error = xfs_trans_reserve(tp, 0,
Christoph Hellwig	a27a263	2011-06-16 12:02:23 +0000	[diff] [blame]	201	XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
Christoph Hellwig	fd3200b	2010-02-15 09:44:48 +0000	[diff] [blame]	202	if (error) {
				203	xfs_trans_cancel(tp, 0);
				204	return -error;
				205	}
				206	xfs_ilock(ip, XFS_ILOCK_EXCL);
				207
				208	/*
				209	* Note - it's possible that we might have pushed ourselves out
				210	* of the way during trans_reserve which would flush the inode.
				211	* But there's no guarantee that the inode buffer has actually
				212	* gone out yet (it's delwri). Plus the buffer could be pinned
				213	* anyway if it's part of an inode in another recent
				214	* transaction. So we play it safe and fire off the
				215	* transaction anyway.
				216	*/
Christoph Hellwig	898621d	2010-06-24 11:36:58 +1000	[diff] [blame]	217	xfs_trans_ijoin(tp, ip);
Christoph Hellwig	fd3200b	2010-02-15 09:44:48 +0000	[diff] [blame]	218	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				219	xfs_trans_set_sync(tp);
				220	error = _xfs_trans_commit(tp, 0, &log_flushed);
				221
				222	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				223	} else {
				224	/*
				225	* Timestamps/size haven't changed since last inode flush or
				226	* inode transaction commit. That means either nothing got
				227	* written or a transaction committed which caught the updates.
				228	* If the latter happened and the transaction hasn't hit the
				229	* disk yet, the inode will be still be pinned. If it is,
				230	* force the log.
				231	*/
Christoph Hellwig	fd3200b	2010-02-15 09:44:48 +0000	[diff] [blame]	232	if (xfs_ipincount(ip)) {
Christoph Hellwig	a27a263	2011-06-16 12:02:23 +0000	[diff] [blame]	233	error = _xfs_log_force_lsn(mp,
Christoph Hellwig	024910c	2010-02-17 19:34:57 +0000	[diff] [blame]	234	ip->i_itemp->ili_last_lsn,
				235	XFS_LOG_SYNC, &log_flushed);
Christoph Hellwig	fd3200b	2010-02-15 09:44:48 +0000	[diff] [blame]	236	}
Christoph Hellwig	024910c	2010-02-17 19:34:57 +0000	[diff] [blame]	237	xfs_iunlock(ip, XFS_ILOCK_SHARED);
Christoph Hellwig	fd3200b	2010-02-15 09:44:48 +0000	[diff] [blame]	238	}
				239
Christoph Hellwig	a27a263	2011-06-16 12:02:23 +0000	[diff] [blame]	240	/*
				241	* If we only have a single device, and the log force about was
				242	* a no-op we might have to flush the data device cache here.
				243	* This can only happen for fdatasync/O_DSYNC if we were overwriting
				244	* an already allocated file and thus do not have any metadata to
				245	* commit.
				246	*/
				247	if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
				248	mp->m_logdev_targp == mp->m_ddev_targp &&
				249	!XFS_IS_REALTIME_INODE(ip) &&
				250	!log_flushed)
				251	xfs_blkdev_issue_flush(mp->m_ddev_targp);
Christoph Hellwig	fd3200b	2010-02-15 09:44:48 +0000	[diff] [blame]	252
				253	return -error;
				254	}
				255
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	256	STATIC ssize_t
				257	xfs_file_aio_read(
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	258	struct kiocb *iocb,
				259	const struct iovec *iovp,
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	260	unsigned long nr_segs,
				261	loff_t pos)
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	262	{
				263	struct file *file = iocb->ki_filp;
				264	struct inode *inode = file->f_mapping->host;
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	265	struct xfs_inode *ip = XFS_I(inode);
				266	struct xfs_mount *mp = ip->i_mount;
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	267	size_t size = 0;
				268	ssize_t ret = 0;
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	269	int ioflags = 0;
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	270	xfs_fsize_t n;
				271	unsigned long seg;
				272
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	273	XFS_STATS_INC(xs_read_calls);
				274
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	275	BUG_ON(iocb->ki_pos != pos);
				276
				277	if (unlikely(file->f_flags & O_DIRECT))
				278	ioflags \|= IO_ISDIRECT;
				279	if (file->f_mode & FMODE_NOCMTIME)
				280	ioflags \|= IO_INVIS;
				281
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	282	/* START copy & waste from filemap.c */
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	283	for (seg = 0; seg < nr_segs; seg++) {
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	284	const struct iovec *iv = &iovp[seg];
				285
				286	/*
				287	* If any segment has a negative length, or the cumulative
				288	* length ever wraps negative then return -EINVAL.
				289	*/
				290	size += iv->iov_len;
				291	if (unlikely((ssize_t)(size\|iv->iov_len) < 0))
				292	return XFS_ERROR(-EINVAL);
				293	}
				294	/* END copy & waste from filemap.c */
				295
				296	if (unlikely(ioflags & IO_ISDIRECT)) {
				297	xfs_buftarg_t *target =
				298	XFS_IS_REALTIME_INODE(ip) ?
				299	mp->m_rtdev_targp : mp->m_ddev_targp;
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	300	if ((iocb->ki_pos & target->bt_smask) \|\|
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	301	(size & target->bt_smask)) {
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	302	if (iocb->ki_pos == ip->i_size)
				303	return 0;
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	304	return -XFS_ERROR(EINVAL);
				305	}
				306	}
				307
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	308	n = XFS_MAXIOFFSET(mp) - iocb->ki_pos;
				309	if (n <= 0 \|\| size == 0)
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	310	return 0;
				311
				312	if (n < size)
				313	size = n;
				314
				315	if (XFS_FORCED_SHUTDOWN(mp))
				316	return -EIO;
				317
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	318	if (unlikely(ioflags & IO_ISDIRECT)) {
Dave Chinner	487f84f	2011-01-12 11:37:10 +1100	[diff] [blame]	319	xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
				320
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	321	if (inode->i_mapping->nrpages) {
				322	ret = -xfs_flushinval_pages(ip,
				323	(iocb->ki_pos & PAGE_CACHE_MASK),
				324	-1, FI_REMAPF_LOCKED);
Dave Chinner	487f84f	2011-01-12 11:37:10 +1100	[diff] [blame]	325	if (ret) {
				326	xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
				327	return ret;
				328	}
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	329	}
Dave Chinner	487f84f	2011-01-12 11:37:10 +1100	[diff] [blame]	330	xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
				331	} else
				332	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	333
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	334	trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	335
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	336	ret = generic_file_aio_read(iocb, iovp, nr_segs, iocb->ki_pos);
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	337	if (ret > 0)
				338	XFS_STATS_ADD(xs_read_bytes, ret);
				339
Dave Chinner	487f84f	2011-01-12 11:37:10 +1100	[diff] [blame]	340	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	341	return ret;
				342	}
				343
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	344	STATIC ssize_t
				345	xfs_file_splice_read(
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	346	struct file *infilp,
				347	loff_t *ppos,
				348	struct pipe_inode_info *pipe,
				349	size_t count,
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	350	unsigned int flags)
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	351	{
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	352	struct xfs_inode *ip = XFS_I(infilp->f_mapping->host);
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	353	int ioflags = 0;
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	354	ssize_t ret;
				355
				356	XFS_STATS_INC(xs_read_calls);
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	357
				358	if (infilp->f_mode & FMODE_NOCMTIME)
				359	ioflags \|= IO_INVIS;
				360
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	361	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
				362	return -EIO;
				363
Dave Chinner	487f84f	2011-01-12 11:37:10 +1100	[diff] [blame]	364	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	365
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	366	trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
				367
				368	ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
				369	if (ret > 0)
				370	XFS_STATS_ADD(xs_read_bytes, ret);
				371
Dave Chinner	487f84f	2011-01-12 11:37:10 +1100	[diff] [blame]	372	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	373	return ret;
				374	}
				375
Dave Chinner	edafb6d	2011-01-11 10:14:06 +1100	[diff] [blame]	376	STATIC void
				377	xfs_aio_write_isize_update(
				378	struct inode *inode,
				379	loff_t *ppos,
				380	ssize_t bytes_written)
				381	{
				382	struct xfs_inode *ip = XFS_I(inode);
				383	xfs_fsize_t isize = i_size_read(inode);
				384
				385	if (bytes_written > 0)
				386	XFS_STATS_ADD(xs_write_bytes, bytes_written);
				387
				388	if (unlikely(bytes_written < 0 && bytes_written != -EFAULT &&
				389	*ppos > isize))
				390	*ppos = isize;
				391
				392	if (*ppos > ip->i_size) {
Dave Chinner	487f84f	2011-01-12 11:37:10 +1100	[diff] [blame]	393	xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
Dave Chinner	edafb6d	2011-01-11 10:14:06 +1100	[diff] [blame]	394	if (*ppos > ip->i_size)
				395	ip->i_size = *ppos;
Dave Chinner	487f84f	2011-01-12 11:37:10 +1100	[diff] [blame]	396	xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
Dave Chinner	edafb6d	2011-01-11 10:14:06 +1100	[diff] [blame]	397	}
				398	}
				399
Dave Chinner	4c5cfd1	2011-01-11 10:14:16 +1100	[diff] [blame]	400	/*
				401	* If this was a direct or synchronous I/O that failed (such as ENOSPC) then
Lucas De Marchi	25985ed	2011-03-30 22:57:33 -0300	[diff] [blame]	402	* part of the I/O may have been written to disk before the error occurred. In
Dave Chinner	4c5cfd1	2011-01-11 10:14:16 +1100	[diff] [blame]	403	* this case the on-disk file size may have been adjusted beyond the in-memory
				404	* file size and now needs to be truncated back.
				405	*/
				406	STATIC void
				407	xfs_aio_write_newsize_update(
				408	struct xfs_inode *ip)
				409	{
				410	if (ip->i_new_size) {
Dave Chinner	487f84f	2011-01-12 11:37:10 +1100	[diff] [blame]	411	xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
Dave Chinner	4c5cfd1	2011-01-11 10:14:16 +1100	[diff] [blame]	412	ip->i_new_size = 0;
				413	if (ip->i_d.di_size > ip->i_size)
				414	ip->i_d.di_size = ip->i_size;
Dave Chinner	487f84f	2011-01-12 11:37:10 +1100	[diff] [blame]	415	xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
Dave Chinner	4c5cfd1	2011-01-11 10:14:16 +1100	[diff] [blame]	416	}
				417	}
				418
Dave Chinner	487f84f	2011-01-12 11:37:10 +1100	[diff] [blame]	419	/*
				420	* xfs_file_splice_write() does not use xfs_rw_ilock() because
				421	* generic_file_splice_write() takes the i_mutex itself. This, in theory,
				422	* couuld cause lock inversions between the aio_write path and the splice path
				423	* if someone is doing concurrent splice(2) based writes and write(2) based
				424	* writes to the same inode. The only real way to fix this is to re-implement
				425	* the generic code here with correct locking orders.
				426	*/
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	427	STATIC ssize_t
				428	xfs_file_splice_write(
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	429	struct pipe_inode_info *pipe,
				430	struct file *outfilp,
				431	loff_t *ppos,
				432	size_t count,
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	433	unsigned int flags)
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	434	{
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	435	struct inode *inode = outfilp->f_mapping->host;
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	436	struct xfs_inode *ip = XFS_I(inode);
Dave Chinner	edafb6d	2011-01-11 10:14:06 +1100	[diff] [blame]	437	xfs_fsize_t new_size;
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	438	int ioflags = 0;
				439	ssize_t ret;
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	440
				441	XFS_STATS_INC(xs_write_calls);
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	442
				443	if (outfilp->f_mode & FMODE_NOCMTIME)
				444	ioflags \|= IO_INVIS;
				445
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	446	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
				447	return -EIO;
				448
				449	xfs_ilock(ip, XFS_IOLOCK_EXCL);
				450
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	451	new_size = *ppos + count;
				452
				453	xfs_ilock(ip, XFS_ILOCK_EXCL);
				454	if (new_size > ip->i_size)
				455	ip->i_new_size = new_size;
				456	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				457
				458	trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
				459
				460	ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	461
Dave Chinner	edafb6d	2011-01-11 10:14:06 +1100	[diff] [blame]	462	xfs_aio_write_isize_update(inode, ppos, ret);
Dave Chinner	4c5cfd1	2011-01-11 10:14:16 +1100	[diff] [blame]	463	xfs_aio_write_newsize_update(ip);
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	464	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
				465	return ret;
				466	}
				467
				468	/*
				469	* This routine is called to handle zeroing any space in the last
				470	* block of the file that is beyond the EOF. We do this since the
				471	* size is being increased without writing anything to that block
				472	* and we don't want anyone to read the garbage on the disk.
				473	*/
				474	STATIC int /* error (positive) */
				475	xfs_zero_last_block(
				476	xfs_inode_t *ip,
				477	xfs_fsize_t offset,
				478	xfs_fsize_t isize)
				479	{
				480	xfs_fileoff_t last_fsb;
				481	xfs_mount_t *mp = ip->i_mount;
				482	int nimaps;
				483	int zero_offset;
				484	int zero_len;
				485	int error = 0;
				486	xfs_bmbt_irec_t imap;
				487
				488	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
				489
				490	zero_offset = XFS_B_FSB_OFFSET(mp, isize);
				491	if (zero_offset == 0) {
				492	/*
				493	* There are no extra bytes in the last block on disk to
				494	* zero, so return.
				495	*/
				496	return 0;
				497	}
				498
				499	last_fsb = XFS_B_TO_FSBT(mp, isize);
				500	nimaps = 1;
				501	error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
Christoph Hellwig	b4e9181	2010-06-23 18:11:15 +1000	[diff] [blame]	502	&nimaps, NULL);
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	503	if (error) {
				504	return error;
				505	}
				506	ASSERT(nimaps > 0);
				507	/*
				508	* If the block underlying isize is just a hole, then there
				509	* is nothing to zero.
				510	*/
				511	if (imap.br_startblock == HOLESTARTBLOCK) {
				512	return 0;
				513	}
				514	/*
				515	* Zero the part of the last block beyond the EOF, and write it
				516	* out sync. We need to drop the ilock while we do this so we
				517	* don't deadlock when the buffer cache calls back to us.
				518	*/
				519	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				520
				521	zero_len = mp->m_sb.sb_blocksize - zero_offset;
				522	if (isize + zero_len > offset)
				523	zero_len = offset - isize;
				524	error = xfs_iozero(ip, isize, zero_len);
				525
				526	xfs_ilock(ip, XFS_ILOCK_EXCL);
				527	ASSERT(error >= 0);
				528	return error;
				529	}
				530
				531	/*
				532	* Zero any on disk space between the current EOF and the new,
				533	* larger EOF. This handles the normal case of zeroing the remainder
				534	* of the last block in the file and the unusual case of zeroing blocks
				535	* out beyond the size of the file. This second case only happens
				536	* with fixed size extents and when the system crashes before the inode
				537	* size was updated but after blocks were allocated. If fill is set,
				538	* then any holes in the range are filled and zeroed. If not, the holes
				539	* are left alone as holes.
				540	*/
				541
				542	int /* error (positive) */
				543	xfs_zero_eof(
				544	xfs_inode_t *ip,
				545	xfs_off_t offset, /* starting I/O offset */
				546	xfs_fsize_t isize) /* current inode size */
				547	{
				548	xfs_mount_t *mp = ip->i_mount;
				549	xfs_fileoff_t start_zero_fsb;
				550	xfs_fileoff_t end_zero_fsb;
				551	xfs_fileoff_t zero_count_fsb;
				552	xfs_fileoff_t last_fsb;
				553	xfs_fileoff_t zero_off;
				554	xfs_fsize_t zero_len;
				555	int nimaps;
				556	int error = 0;
				557	xfs_bmbt_irec_t imap;
				558
				559	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL\|XFS_IOLOCK_EXCL));
				560	ASSERT(offset > isize);
				561
				562	/*
				563	* First handle zeroing the block on which isize resides.
				564	* We only zero a part of that block so it is handled specially.
				565	*/
				566	error = xfs_zero_last_block(ip, offset, isize);
				567	if (error) {
				568	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL\|XFS_IOLOCK_EXCL));
				569	return error;
				570	}
				571
				572	/*
				573	* Calculate the range between the new size and the old
				574	* where blocks needing to be zeroed may exist. To get the
				575	* block where the last byte in the file currently resides,
				576	* we need to subtract one from the size and truncate back
				577	* to a block boundary. We subtract 1 in case the size is
				578	* exactly on a block boundary.
				579	*/
				580	last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
				581	start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
				582	end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
				583	ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
				584	if (last_fsb == end_zero_fsb) {
				585	/*
				586	* The size was only incremented on its last block.
				587	* We took care of that above, so just return.
				588	*/
				589	return 0;
				590	}
				591
				592	ASSERT(start_zero_fsb <= end_zero_fsb);
				593	while (start_zero_fsb <= end_zero_fsb) {
				594	nimaps = 1;
				595	zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
				596	error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
Christoph Hellwig	b4e9181	2010-06-23 18:11:15 +1000	[diff] [blame]	597	0, NULL, 0, &imap, &nimaps, NULL);
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	598	if (error) {
				599	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL\|XFS_IOLOCK_EXCL));
				600	return error;
				601	}
				602	ASSERT(nimaps > 0);
				603
				604	if (imap.br_state == XFS_EXT_UNWRITTEN \|\|
				605	imap.br_startblock == HOLESTARTBLOCK) {
				606	/*
				607	* This loop handles initializing pages that were
				608	* partially initialized by the code below this
				609	* loop. It basically zeroes the part of the page
				610	* that sits on a hole and sets the page as P_HOLE
				611	* and calls remapf if it is a mapped file.
				612	*/
				613	start_zero_fsb = imap.br_startoff + imap.br_blockcount;
				614	ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
				615	continue;
				616	}
				617
				618	/*
				619	* There are blocks we need to zero.
				620	* Drop the inode lock while we're doing the I/O.
				621	* We'll still have the iolock to protect us.
				622	*/
				623	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				624
				625	zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
				626	zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
				627
				628	if ((zero_off + zero_len) > offset)
				629	zero_len = offset - zero_off;
				630
				631	error = xfs_iozero(ip, zero_off, zero_len);
				632	if (error) {
				633	goto out_lock;
				634	}
				635
				636	start_zero_fsb = imap.br_startoff + imap.br_blockcount;
				637	ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
				638
				639	xfs_ilock(ip, XFS_ILOCK_EXCL);
				640	}
				641
				642	return 0;
				643
				644	out_lock:
				645	xfs_ilock(ip, XFS_ILOCK_EXCL);
				646	ASSERT(error >= 0);
				647	return error;
				648	}
				649
Dave Chinner	f0d26e8	2011-01-11 10:15:36 +1100	[diff] [blame]	650	/*
Dave Chinner	4d8d158	2011-01-11 10:23:42 +1100	[diff] [blame]	651	* Common pre-write limit and setup checks.
				652	*
				653	* Returns with iolock held according to @iolock.
				654	*/
				655	STATIC ssize_t
				656	xfs_file_aio_write_checks(
				657	struct file *file,
				658	loff_t *pos,
				659	size_t *count,
				660	int *iolock)
				661	{
				662	struct inode *inode = file->f_mapping->host;
				663	struct xfs_inode *ip = XFS_I(inode);
				664	xfs_fsize_t new_size;
				665	int error = 0;
				666
				667	error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
				668	if (error) {
				669	xfs_rw_iunlock(ip, XFS_ILOCK_EXCL \| *iolock);
				670	*iolock = 0;
				671	return error;
				672	}
				673
				674	new_size = pos + count;
				675	if (new_size > ip->i_size)
				676	ip->i_new_size = new_size;
				677
				678	if (likely(!(file->f_mode & FMODE_NOCMTIME)))
				679	file_update_time(file);
				680
				681	/*
				682	* If the offset is beyond the size of the file, we need to zero any
				683	* blocks that fall between the existing EOF and the start of this
				684	* write.
				685	*/
				686	if (*pos > ip->i_size)
				687	error = -xfs_zero_eof(ip, *pos, ip->i_size);
				688
				689	xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
				690	if (error)
				691	return error;
				692
				693	/*
				694	* If we're writing the file then make sure to clear the setuid and
				695	* setgid bits if the process is not being run by root. This keeps
				696	* people from modifying setuid and setgid binaries.
				697	*/
				698	return file_remove_suid(file);
				699
				700	}
				701
				702	/*
Dave Chinner	f0d26e8	2011-01-11 10:15:36 +1100	[diff] [blame]	703	* xfs_file_dio_aio_write - handle direct IO writes
				704	*
				705	* Lock the inode appropriately to prepare for and issue a direct IO write.
Dave Chinner	eda7798	2011-01-11 10:22:40 +1100	[diff] [blame]	706	* By separating it from the buffered write path we remove all the tricky to
Dave Chinner	f0d26e8	2011-01-11 10:15:36 +1100	[diff] [blame]	707	* follow locking changes and looping.
				708	*
Dave Chinner	eda7798	2011-01-11 10:22:40 +1100	[diff] [blame]	709	* If there are cached pages or we're extending the file, we need IOLOCK_EXCL
				710	* until we're sure the bytes at the new EOF have been zeroed and/or the cached
				711	* pages are flushed out.
				712	*
				713	* In most cases the direct IO writes will be done holding IOLOCK_SHARED
				714	* allowing them to be done in parallel with reads and other direct IO writes.
				715	* However, if the IO is not aligned to filesystem blocks, the direct IO layer
				716	* needs to do sub-block zeroing and that requires serialisation against other
				717	* direct IOs to the same block. In this case we need to serialise the
				718	* submission of the unaligned IOs so that we don't get racing block zeroing in
				719	* the dio layer. To avoid the problem with aio, we also need to wait for
				720	* outstanding IOs to complete so that unwritten extent conversion is completed
				721	* before we try to map the overlapping block. This is currently implemented by
				722	* hitting it with a big hammer (i.e. xfs_ioend_wait()).
				723	*
Dave Chinner	f0d26e8	2011-01-11 10:15:36 +1100	[diff] [blame]	724	* Returns with locks held indicated by @iolock and errors indicated by
				725	* negative return values.
				726	*/
				727	STATIC ssize_t
				728	xfs_file_dio_aio_write(
				729	struct kiocb *iocb,
				730	const struct iovec *iovp,
				731	unsigned long nr_segs,
				732	loff_t pos,
				733	size_t ocount,
				734	int *iolock)
				735	{
				736	struct file *file = iocb->ki_filp;
				737	struct address_space *mapping = file->f_mapping;
				738	struct inode *inode = mapping->host;
				739	struct xfs_inode *ip = XFS_I(inode);
				740	struct xfs_mount *mp = ip->i_mount;
				741	ssize_t ret = 0;
Dave Chinner	f0d26e8	2011-01-11 10:15:36 +1100	[diff] [blame]	742	size_t count = ocount;
Dave Chinner	eda7798	2011-01-11 10:22:40 +1100	[diff] [blame]	743	int unaligned_io = 0;
Dave Chinner	f0d26e8	2011-01-11 10:15:36 +1100	[diff] [blame]	744	struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
				745	mp->m_rtdev_targp : mp->m_ddev_targp;
				746
				747	*iolock = 0;
				748	if ((pos & target->bt_smask) \|\| (count & target->bt_smask))
				749	return -XFS_ERROR(EINVAL);
				750
Dave Chinner	eda7798	2011-01-11 10:22:40 +1100	[diff] [blame]	751	if ((pos & mp->m_blockmask) \|\| ((pos + count) & mp->m_blockmask))
				752	unaligned_io = 1;
				753
				754	if (unaligned_io \|\| mapping->nrpages \|\| pos > ip->i_size)
Dave Chinner	f0d26e8	2011-01-11 10:15:36 +1100	[diff] [blame]	755	*iolock = XFS_IOLOCK_EXCL;
				756	else
				757	*iolock = XFS_IOLOCK_SHARED;
				758	xfs_rw_ilock(ip, XFS_ILOCK_EXCL \| *iolock);
				759
Dave Chinner	4d8d158	2011-01-11 10:23:42 +1100	[diff] [blame]	760	ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
				761	if (ret)
Dave Chinner	f0d26e8	2011-01-11 10:15:36 +1100	[diff] [blame]	762	return ret;
				763
				764	if (mapping->nrpages) {
				765	WARN_ON(*iolock != XFS_IOLOCK_EXCL);
				766	ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
				767	FI_REMAPF_LOCKED);
				768	if (ret)
				769	return ret;
				770	}
				771
Dave Chinner	eda7798	2011-01-11 10:22:40 +1100	[diff] [blame]	772	/*
				773	* If we are doing unaligned IO, wait for all other IO to drain,
				774	* otherwise demote the lock if we had to flush cached pages
				775	*/
				776	if (unaligned_io)
				777	xfs_ioend_wait(ip);
				778	else if (*iolock == XFS_IOLOCK_EXCL) {
Dave Chinner	f0d26e8	2011-01-11 10:15:36 +1100	[diff] [blame]	779	xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
				780	*iolock = XFS_IOLOCK_SHARED;
				781	}
				782
				783	trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
				784	ret = generic_file_direct_write(iocb, iovp,
				785	&nr_segs, pos, &iocb->ki_pos, count, ocount);
				786
				787	/* No fallback to buffered IO on errors for XFS. */
				788	ASSERT(ret < 0 \|\| ret == count);
				789	return ret;
				790	}
				791
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	792	STATIC ssize_t
Dave Chinner	637bbc7	2011-01-11 10:17:30 +1100	[diff] [blame]	793	xfs_file_buffered_aio_write(
				794	struct kiocb *iocb,
				795	const struct iovec *iovp,
				796	unsigned long nr_segs,
				797	loff_t pos,
				798	size_t ocount,
				799	int *iolock)
				800	{
				801	struct file *file = iocb->ki_filp;
				802	struct address_space *mapping = file->f_mapping;
				803	struct inode *inode = mapping->host;
				804	struct xfs_inode *ip = XFS_I(inode);
				805	ssize_t ret;
				806	int enospc = 0;
Dave Chinner	637bbc7	2011-01-11 10:17:30 +1100	[diff] [blame]	807	size_t count = ocount;
				808
				809	*iolock = XFS_IOLOCK_EXCL;
				810	xfs_rw_ilock(ip, XFS_ILOCK_EXCL \| *iolock);
				811
Dave Chinner	4d8d158	2011-01-11 10:23:42 +1100	[diff] [blame]	812	ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
				813	if (ret)
Dave Chinner	637bbc7	2011-01-11 10:17:30 +1100	[diff] [blame]	814	return ret;
				815
				816	/* We can write back this queue in page reclaim */
				817	current->backing_dev_info = mapping->backing_dev_info;
				818
				819	write_retry:
				820	trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
				821	ret = generic_file_buffered_write(iocb, iovp, nr_segs,
				822	pos, &iocb->ki_pos, count, ret);
				823	/*
				824	* if we just got an ENOSPC, flush the inode now we aren't holding any
				825	* page locks and retry once
				826	*/
				827	if (ret == -ENOSPC && !enospc) {
				828	ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
				829	if (ret)
				830	return ret;
				831	enospc = 1;
				832	goto write_retry;
				833	}
				834	current->backing_dev_info = NULL;
				835	return ret;
				836	}
				837
				838	STATIC ssize_t
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	839	xfs_file_aio_write(
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	840	struct kiocb *iocb,
				841	const struct iovec *iovp,
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	842	unsigned long nr_segs,
				843	loff_t pos)
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	844	{
				845	struct file *file = iocb->ki_filp;
				846	struct address_space *mapping = file->f_mapping;
				847	struct inode *inode = mapping->host;
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	848	struct xfs_inode *ip = XFS_I(inode);
Dave Chinner	637bbc7	2011-01-11 10:17:30 +1100	[diff] [blame]	849	ssize_t ret;
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	850	int iolock;
Dave Chinner	637bbc7	2011-01-11 10:17:30 +1100	[diff] [blame]	851	size_t ocount = 0;
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	852
				853	XFS_STATS_INC(xs_write_calls);
				854
Christoph Hellwig	00258e3	2010-02-15 09:44:47 +0000	[diff] [blame]	855	BUG_ON(iocb->ki_pos != pos);
				856
Dave Chinner	a363f0c	2011-01-11 10:13:53 +1100	[diff] [blame]	857	ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
				858	if (ret)
				859	return ret;
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	860
Dave Chinner	637bbc7	2011-01-11 10:17:30 +1100	[diff] [blame]	861	if (ocount == 0)
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	862	return 0;
				863
Dave Chinner	637bbc7	2011-01-11 10:17:30 +1100	[diff] [blame]	864	xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE);
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	865
Dave Chinner	637bbc7	2011-01-11 10:17:30 +1100	[diff] [blame]	866	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	867	return -EIO;
				868
Dave Chinner	637bbc7	2011-01-11 10:17:30 +1100	[diff] [blame]	869	if (unlikely(file->f_flags & O_DIRECT))
Dave Chinner	f0d26e8	2011-01-11 10:15:36 +1100	[diff] [blame]	870	ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos,
				871	ocount, &iolock);
Dave Chinner	637bbc7	2011-01-11 10:17:30 +1100	[diff] [blame]	872	else
				873	ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos,
				874	ocount, &iolock);
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	875
Dave Chinner	edafb6d	2011-01-11 10:14:06 +1100	[diff] [blame]	876	xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret);
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	877
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	878	if (ret <= 0)
Dave Chinner	637bbc7	2011-01-11 10:17:30 +1100	[diff] [blame]	879	goto out_unlock;
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	880
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	881	/* Handle various SYNC-type writes */
				882	if ((file->f_flags & O_DSYNC) \|\| IS_SYNC(inode)) {
				883	loff_t end = pos + ret - 1;
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	884
Dave Chinner	487f84f	2011-01-12 11:37:10 +1100	[diff] [blame]	885	xfs_rw_iunlock(ip, iolock);
Josef Bacik	02c24a8	2011-07-16 20:44:56 -0400	[diff] [blame^]	886	ret = -xfs_file_fsync(file, pos, end,
				887	(file->f_flags & __O_SYNC) ? 0 : 1);
Dave Chinner	487f84f	2011-01-12 11:37:10 +1100	[diff] [blame]	888	xfs_rw_ilock(ip, iolock);
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	889	}
				890
Dave Chinner	637bbc7	2011-01-11 10:17:30 +1100	[diff] [blame]	891	out_unlock:
Dave Chinner	4c5cfd1	2011-01-11 10:14:16 +1100	[diff] [blame]	892	xfs_aio_write_newsize_update(ip);
Dave Chinner	487f84f	2011-01-12 11:37:10 +1100	[diff] [blame]	893	xfs_rw_iunlock(ip, iolock);
Dave Chinner	a363f0c	2011-01-11 10:13:53 +1100	[diff] [blame]	894	return ret;
Christoph Hellwig	dda35b8	2010-02-15 09:44:46 +0000	[diff] [blame]	895	}
				896
Christoph Hellwig	2fe17c1	2011-01-14 13:07:43 +0100	[diff] [blame]	897	STATIC long
				898	xfs_file_fallocate(
				899	struct file *file,
				900	int mode,
				901	loff_t offset,
				902	loff_t len)
				903	{
				904	struct inode *inode = file->f_path.dentry->d_inode;
				905	long error;
				906	loff_t new_size = 0;
				907	xfs_flock64_t bf;
				908	xfs_inode_t *ip = XFS_I(inode);
				909	int cmd = XFS_IOC_RESVSP;
Dave Chinner	8287889	2011-03-26 09:13:08 +1100	[diff] [blame]	910	int attr_flags = XFS_ATTR_NOLOCK;
Christoph Hellwig	2fe17c1	2011-01-14 13:07:43 +0100	[diff] [blame]	911
				912	if (mode & ~(FALLOC_FL_KEEP_SIZE \| FALLOC_FL_PUNCH_HOLE))
				913	return -EOPNOTSUPP;
				914
				915	bf.l_whence = 0;
				916	bf.l_start = offset;
				917	bf.l_len = len;
				918
				919	xfs_ilock(ip, XFS_IOLOCK_EXCL);
				920
				921	if (mode & FALLOC_FL_PUNCH_HOLE)
				922	cmd = XFS_IOC_UNRESVSP;
				923
				924	/* check the new inode size is valid before allocating */
				925	if (!(mode & FALLOC_FL_KEEP_SIZE) &&
				926	offset + len > i_size_read(inode)) {
				927	new_size = offset + len;
				928	error = inode_newsize_ok(inode, new_size);
				929	if (error)
				930	goto out_unlock;
				931	}
				932
Dave Chinner	8287889	2011-03-26 09:13:08 +1100	[diff] [blame]	933	if (file->f_flags & O_DSYNC)
				934	attr_flags \|= XFS_ATTR_SYNC;
				935
				936	error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags);
Christoph Hellwig	2fe17c1	2011-01-14 13:07:43 +0100	[diff] [blame]	937	if (error)
				938	goto out_unlock;
				939
				940	/* Change file size if needed */
				941	if (new_size) {
				942	struct iattr iattr;
				943
				944	iattr.ia_valid = ATTR_SIZE;
				945	iattr.ia_size = new_size;
				946	error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
				947	}
				948
				949	out_unlock:
				950	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
				951	return error;
				952	}
				953
				954
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	955	STATIC int
Nathan Scott	3562fd4	2006-03-14 14:00:35 +1100	[diff] [blame]	956	xfs_file_open(
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	957	struct inode *inode,
Christoph Hellwig	f999a5b	2008-11-28 14:23:32 +1100	[diff] [blame]	958	struct file *file)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	959	{
Christoph Hellwig	f999a5b	2008-11-28 14:23:32 +1100	[diff] [blame]	960	if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	961	return -EFBIG;
Christoph Hellwig	f999a5b	2008-11-28 14:23:32 +1100	[diff] [blame]	962	if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
				963	return -EIO;
				964	return 0;
				965	}
				966
				967	STATIC int
				968	xfs_dir_open(
				969	struct inode *inode,
				970	struct file *file)
				971	{
				972	struct xfs_inode *ip = XFS_I(inode);
				973	int mode;
				974	int error;
				975
				976	error = xfs_file_open(inode, file);
				977	if (error)
				978	return error;
				979
				980	/*
				981	* If there are any blocks, read-ahead block 0 as we're almost
				982	* certain to have the next operation be a read there.
				983	*/
				984	mode = xfs_ilock_map_shared(ip);
				985	if (ip->i_d.di_nextents > 0)
				986	xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK);
				987	xfs_iunlock(ip, mode);
				988	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	989	}
				990
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	991	STATIC int
Nathan Scott	3562fd4	2006-03-14 14:00:35 +1100	[diff] [blame]	992	xfs_file_release(
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	993	struct inode *inode,
				994	struct file *filp)
				995	{
Christoph Hellwig	739bfb2	2007-08-29 10:58:01 +1000	[diff] [blame]	996	return -xfs_release(XFS_I(inode));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	997	}
				998
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	999	STATIC int
Nathan Scott	3562fd4	2006-03-14 14:00:35 +1100	[diff] [blame]	1000	xfs_file_readdir(
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1001	struct file *filp,
				1002	void *dirent,
				1003	filldir_t filldir)
				1004	{
Christoph Hellwig	051e7cd	2007-08-28 13:58:24 +1000	[diff] [blame]	1005	struct inode *inode = filp->f_path.dentry->d_inode;
Christoph Hellwig	739bfb2	2007-08-29 10:58:01 +1000	[diff] [blame]	1006	xfs_inode_t *ip = XFS_I(inode);
Christoph Hellwig	051e7cd	2007-08-28 13:58:24 +1000	[diff] [blame]	1007	int error;
				1008	size_t bufsize;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1009
Christoph Hellwig	051e7cd	2007-08-28 13:58:24 +1000	[diff] [blame]	1010	/*
				1011	* The Linux API doesn't pass down the total size of the buffer
				1012	* we read into down to the filesystem. With the filldir concept
				1013	* it's not needed for correct information, but the XFS dir2 leaf
				1014	* code wants an estimate of the buffer size to calculate it's
				1015	* readahead window and size the buffers used for mapping to
				1016	* physical blocks.
				1017	*
				1018	* Try to give it an estimate that's good enough, maybe at some
				1019	* point we can change the ->readdir prototype to include the
Eric Sandeen	a9cc799	2010-02-03 17:50:13 +0000	[diff] [blame]	1020	* buffer size. For now we use the current glibc buffer size.
Christoph Hellwig	051e7cd	2007-08-28 13:58:24 +1000	[diff] [blame]	1021	*/
Eric Sandeen	a9cc799	2010-02-03 17:50:13 +0000	[diff] [blame]	1022	bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1023
Christoph Hellwig	739bfb2	2007-08-29 10:58:01 +1000	[diff] [blame]	1024	error = xfs_readdir(ip, dirent, bufsize,
Christoph Hellwig	051e7cd	2007-08-28 13:58:24 +1000	[diff] [blame]	1025	(xfs_off_t *)&filp->f_pos, filldir);
				1026	if (error)
				1027	return -error;
				1028	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1029	}
				1030
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1031	STATIC int
Nathan Scott	3562fd4	2006-03-14 14:00:35 +1100	[diff] [blame]	1032	xfs_file_mmap(
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1033	struct file *filp,
				1034	struct vm_area_struct *vma)
				1035	{
Nathan Scott	3562fd4	2006-03-14 14:00:35 +1100	[diff] [blame]	1036	vma->vm_ops = &xfs_file_vm_ops;
Nick Piggin	d0217ac	2007-07-19 01:47:03 -0700	[diff] [blame]	1037	vma->vm_flags \|= VM_CAN_NONLINEAR;
Dean Roehrich	6fac0cb	2005-06-21 14:07:45 +1000	[diff] [blame]	1038
Nathan Scott	fbc1462	2006-06-09 14:52:13 +1000	[diff] [blame]	1039	file_accessed(filp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1040	return 0;
				1041	}
				1042
David Chinner	4f57dbc	2007-07-19 16:28:17 +1000	[diff] [blame]	1043	/*
				1044	* mmap()d file has taken write protection fault and is being made
				1045	* writable. We can set the page state up correctly for a writable
				1046	* page, which means we can do correct delalloc accounting (ENOSPC
				1047	* checking!) and unwritten extent mapping.
				1048	*/
				1049	STATIC int
				1050	xfs_vm_page_mkwrite(
				1051	struct vm_area_struct *vma,
Nick Piggin	c2ec175	2009-03-31 15:23:21 -0700	[diff] [blame]	1052	struct vm_fault *vmf)
David Chinner	4f57dbc	2007-07-19 16:28:17 +1000	[diff] [blame]	1053	{
Nick Piggin	c2ec175	2009-03-31 15:23:21 -0700	[diff] [blame]	1054	return block_page_mkwrite(vma, vmf, xfs_get_blocks);
David Chinner	4f57dbc	2007-07-19 16:28:17 +1000	[diff] [blame]	1055	}
				1056
Arjan van de Ven	4b6f5d2	2006-03-28 01:56:42 -0800	[diff] [blame]	1057	const struct file_operations xfs_file_operations = {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1058	.llseek = generic_file_llseek,
				1059	.read = do_sync_read,
Dean Roehrich	bb3f724	2005-09-02 15:43:05 +1000	[diff] [blame]	1060	.write = do_sync_write,
Nathan Scott	3562fd4	2006-03-14 14:00:35 +1100	[diff] [blame]	1061	.aio_read = xfs_file_aio_read,
				1062	.aio_write = xfs_file_aio_write,
Nathan Scott	1b89584	2006-03-31 13:08:59 +1000	[diff] [blame]	1063	.splice_read = xfs_file_splice_read,
				1064	.splice_write = xfs_file_splice_write,
Nathan Scott	3562fd4	2006-03-14 14:00:35 +1100	[diff] [blame]	1065	.unlocked_ioctl = xfs_file_ioctl,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1066	#ifdef CONFIG_COMPAT
Nathan Scott	3562fd4	2006-03-14 14:00:35 +1100	[diff] [blame]	1067	.compat_ioctl = xfs_file_compat_ioctl,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1068	#endif
Nathan Scott	3562fd4	2006-03-14 14:00:35 +1100	[diff] [blame]	1069	.mmap = xfs_file_mmap,
				1070	.open = xfs_file_open,
				1071	.release = xfs_file_release,
				1072	.fsync = xfs_file_fsync,
Christoph Hellwig	2fe17c1	2011-01-14 13:07:43 +0100	[diff] [blame]	1073	.fallocate = xfs_file_fallocate,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1074	};
				1075
Arjan van de Ven	4b6f5d2	2006-03-28 01:56:42 -0800	[diff] [blame]	1076	const struct file_operations xfs_dir_file_operations = {
Christoph Hellwig	f999a5b	2008-11-28 14:23:32 +1100	[diff] [blame]	1077	.open = xfs_dir_open,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1078	.read = generic_read_dir,
Nathan Scott	3562fd4	2006-03-14 14:00:35 +1100	[diff] [blame]	1079	.readdir = xfs_file_readdir,
Al Viro	59af158	2008-08-24 07:24:41 -0400	[diff] [blame]	1080	.llseek = generic_file_llseek,
Nathan Scott	3562fd4	2006-03-14 14:00:35 +1100	[diff] [blame]	1081	.unlocked_ioctl = xfs_file_ioctl,
Nathan Scott	d387039	2005-05-06 06:44:46 -0700	[diff] [blame]	1082	#ifdef CONFIG_COMPAT
Nathan Scott	3562fd4	2006-03-14 14:00:35 +1100	[diff] [blame]	1083	.compat_ioctl = xfs_file_compat_ioctl,
Nathan Scott	d387039	2005-05-06 06:44:46 -0700	[diff] [blame]	1084	#endif
Nathan Scott	3562fd4	2006-03-14 14:00:35 +1100	[diff] [blame]	1085	.fsync = xfs_file_fsync,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1086	};
				1087
Alexey Dobriyan	f0f37e2	2009-09-27 22:29:37 +0400	[diff] [blame]	1088	static const struct vm_operations_struct xfs_file_vm_ops = {
Nick Piggin	54cb882	2007-07-19 01:46:59 -0700	[diff] [blame]	1089	.fault = filemap_fault,
David Chinner	4f57dbc	2007-07-19 16:28:17 +1000	[diff] [blame]	1090	.page_mkwrite = xfs_vm_page_mkwrite,
Dean Roehrich	6fac0cb	2005-06-21 14:07:45 +1000	[diff] [blame]	1091	};