Blame - fs/xfs/xfs_inode.c - kernel/msm-4.9

blob: 9916aef6099770b7faed0b1296a64156d61f9e87 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
Olaf Weber	3e57ecf	2006-06-09 14:48:12 +1000	[diff] [blame]	2	* Copyright (c) 2000-2006 Silicon Graphics, Inc.
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	3	* All Rights Reserved.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	4	*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	5	* This program is free software; you can redistribute it and/or
				6	* modify it under the terms of the GNU General Public License as
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	7	* published by the Free Software Foundation.
				8	*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	9	* This program is distributed in the hope that it would be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	* GNU General Public License for more details.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	13	*
Nathan Scott	7b71876	2005-11-02 14:58:39 +1100	[diff] [blame]	14	* You should have received a copy of the GNU General Public License
				15	* along with this program; if not, write the Free Software Foundation,
				16	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	17	*/
Robert P. J. Day	40ebd81	2007-11-23 16:30:51 +1100	[diff] [blame]	18	#include <linux/log2.h>
				19
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	20	#include "xfs.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	21	#include "xfs_fs.h"
Dave Chinner	70a9883c	2013-10-23 10:36:05 +1100	[diff] [blame]	22	#include "xfs_shared.h"
Dave Chinner	239880e	2013-10-23 10:50:10 +1100	[diff] [blame]	23	#include "xfs_format.h"
				24	#include "xfs_log_format.h"
				25	#include "xfs_trans_resv.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	26	#include "xfs_sb.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	27	#include "xfs_mount.h"
Dave Chinner	a4fbe6a	2013-10-23 10:51:50 +1100	[diff] [blame]	28	#include "xfs_inode.h"
Dave Chinner	5706278	2013-10-15 09:17:51 +1100	[diff] [blame]	29	#include "xfs_da_format.h"
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	30	#include "xfs_da_btree.h"
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	31	#include "xfs_dir2.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	32	#include "xfs_attr_sf.h"
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	33	#include "xfs_attr.h"
Dave Chinner	239880e	2013-10-23 10:50:10 +1100	[diff] [blame]	34	#include "xfs_trans_space.h"
				35	#include "xfs_trans.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	36	#include "xfs_buf_item.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	37	#include "xfs_inode_item.h"
Nathan Scott	a844f45	2005-11-02 14:38:42 +1100	[diff] [blame]	38	#include "xfs_ialloc.h"
				39	#include "xfs_bmap.h"
Dave Chinner	6898811	2013-08-12 20:49:42 +1000	[diff] [blame]	40	#include "xfs_bmap_util.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	41	#include "xfs_error.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	42	#include "xfs_quota.h"
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	43	#include "xfs_filestream.h"
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	44	#include "xfs_cksum.h"
Christoph Hellwig	0b1b213	2009-12-14 23:14:59 +0000	[diff] [blame]	45	#include "xfs_trace.h"
Dave Chinner	33479e0	2012-10-08 21:56:11 +1100	[diff] [blame]	46	#include "xfs_icache.h"
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	47	#include "xfs_symlink.h"
Dave Chinner	239880e	2013-10-23 10:50:10 +1100	[diff] [blame]	48	#include "xfs_trans_priv.h"
				49	#include "xfs_log.h"
Dave Chinner	a4fbe6a	2013-10-23 10:51:50 +1100	[diff] [blame]	50	#include "xfs_bmap_btree.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	51
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	52	kmem_zone_t *xfs_inode_zone;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	53
				54	/*
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	55	* Used in xfs_itruncate_extents(). This is the maximum number of extents
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	56	* freed from a file in a single transaction.
				57	*/
				58	#define XFS_ITRUNC_MAX_EXTENTS 2
				59
				60	STATIC int xfs_iflush_int(xfs_inode_t , xfs_buf_t );
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	61
Zhi Yong Wu	ab29743	2013-12-18 08:22:41 +0800	[diff] [blame]	62	STATIC int xfs_iunlink_remove(xfs_trans_t , xfs_inode_t );
				63
Dave Chinner	2a0ec1d	2012-04-23 15:59:02 +1000	[diff] [blame]	64	/*
				65	* helper function to extract extent size hint from inode
				66	*/
				67	xfs_extlen_t
				68	xfs_get_extsz_hint(
				69	struct xfs_inode *ip)
				70	{
				71	if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize)
				72	return ip->i_d.di_extsize;
				73	if (XFS_IS_REALTIME_INODE(ip))
				74	return ip->i_mount->m_sb.sb_rextsize;
				75	return 0;
				76	}
				77
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	78	/*
Christoph Hellwig	efa70be	2013-12-18 02:14:39 -0800	[diff] [blame]	79	* These two are wrapper routines around the xfs_ilock() routine used to
				80	* centralize some grungy code. They are used in places that wish to lock the
				81	* inode solely for reading the extents. The reason these places can't just
				82	* call xfs_ilock(ip, XFS_ILOCK_SHARED) is that the inode lock also guards to
				83	* bringing in of the extents from disk for a file in b-tree format. If the
				84	* inode is in b-tree format, then we need to lock the inode exclusively until
				85	* the extents are read in. Locking it exclusively all the time would limit
				86	* our parallelism unnecessarily, though. What we do instead is check to see
				87	* if the extents have been read in yet, and only lock the inode exclusively
				88	* if they have not.
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	89	*
Christoph Hellwig	efa70be	2013-12-18 02:14:39 -0800	[diff] [blame]	90	* The functions return a value which should be given to the corresponding
Christoph Hellwig	01f4f32	2013-12-06 12:30:08 -0800	[diff] [blame]	91	* xfs_iunlock() call.
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	92	*/
				93	uint
Christoph Hellwig	309ecac	2013-12-06 12:30:09 -0800	[diff] [blame]	94	xfs_ilock_data_map_shared(
				95	struct xfs_inode *ip)
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	96	{
Christoph Hellwig	309ecac	2013-12-06 12:30:09 -0800	[diff] [blame]	97	uint lock_mode = XFS_ILOCK_SHARED;
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	98
Christoph Hellwig	309ecac	2013-12-06 12:30:09 -0800	[diff] [blame]	99	if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
				100	(ip->i_df.if_flags & XFS_IFEXTENTS) == 0)
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	101	lock_mode = XFS_ILOCK_EXCL;
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	102	xfs_ilock(ip, lock_mode);
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	103	return lock_mode;
				104	}
				105
Christoph Hellwig	efa70be	2013-12-18 02:14:39 -0800	[diff] [blame]	106	uint
				107	xfs_ilock_attr_map_shared(
				108	struct xfs_inode *ip)
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	109	{
Christoph Hellwig	efa70be	2013-12-18 02:14:39 -0800	[diff] [blame]	110	uint lock_mode = XFS_ILOCK_SHARED;
				111
				112	if (ip->i_d.di_aformat == XFS_DINODE_FMT_BTREE &&
				113	(ip->i_afp->if_flags & XFS_IFEXTENTS) == 0)
				114	lock_mode = XFS_ILOCK_EXCL;
				115	xfs_ilock(ip, lock_mode);
				116	return lock_mode;
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	117	}
				118
				119	/*
				120	* The xfs inode contains 2 locks: a multi-reader lock called the
				121	* i_iolock and a multi-reader lock called the i_lock. This routine
				122	* allows either or both of the locks to be obtained.
				123	*
				124	* The 2 locks should always be ordered so that the IO lock is
				125	* obtained first in order to prevent deadlock.
				126	*
				127	* ip -- the inode being locked
				128	* lock_flags -- this parameter indicates the inode's locks
				129	* to be locked. It can be:
				130	* XFS_IOLOCK_SHARED,
				131	* XFS_IOLOCK_EXCL,
				132	* XFS_ILOCK_SHARED,
				133	* XFS_ILOCK_EXCL,
				134	* XFS_IOLOCK_SHARED \| XFS_ILOCK_SHARED,
				135	* XFS_IOLOCK_SHARED \| XFS_ILOCK_EXCL,
				136	* XFS_IOLOCK_EXCL \| XFS_ILOCK_SHARED,
				137	* XFS_IOLOCK_EXCL \| XFS_ILOCK_EXCL
				138	*/
				139	void
				140	xfs_ilock(
				141	xfs_inode_t *ip,
				142	uint lock_flags)
				143	{
				144	trace_xfs_ilock(ip, lock_flags, _RET_IP_);
				145
				146	/*
				147	* You can't set both SHARED and EXCL for the same lock,
				148	* and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
				149	* and XFS_ILOCK_EXCL are valid values to set in lock_flags.
				150	*/
				151	ASSERT((lock_flags & (XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL)) !=
				152	(XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL));
				153	ASSERT((lock_flags & (XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL)) !=
				154	(XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL));
				155	ASSERT((lock_flags & ~(XFS_LOCK_MASK \| XFS_LOCK_DEP_MASK)) == 0);
				156
				157	if (lock_flags & XFS_IOLOCK_EXCL)
				158	mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
				159	else if (lock_flags & XFS_IOLOCK_SHARED)
				160	mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
				161
				162	if (lock_flags & XFS_ILOCK_EXCL)
				163	mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
				164	else if (lock_flags & XFS_ILOCK_SHARED)
				165	mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
				166	}
				167
				168	/*
				169	* This is just like xfs_ilock(), except that the caller
				170	* is guaranteed not to sleep. It returns 1 if it gets
				171	* the requested locks and 0 otherwise. If the IO lock is
				172	* obtained but the inode lock cannot be, then the IO lock
				173	* is dropped before returning.
				174	*
				175	* ip -- the inode being locked
				176	* lock_flags -- this parameter indicates the inode's locks to be
				177	* to be locked. See the comment for xfs_ilock() for a list
				178	* of valid values.
				179	*/
				180	int
				181	xfs_ilock_nowait(
				182	xfs_inode_t *ip,
				183	uint lock_flags)
				184	{
				185	trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_);
				186
				187	/*
				188	* You can't set both SHARED and EXCL for the same lock,
				189	* and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
				190	* and XFS_ILOCK_EXCL are valid values to set in lock_flags.
				191	*/
				192	ASSERT((lock_flags & (XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL)) !=
				193	(XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL));
				194	ASSERT((lock_flags & (XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL)) !=
				195	(XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL));
				196	ASSERT((lock_flags & ~(XFS_LOCK_MASK \| XFS_LOCK_DEP_MASK)) == 0);
				197
				198	if (lock_flags & XFS_IOLOCK_EXCL) {
				199	if (!mrtryupdate(&ip->i_iolock))
				200	goto out;
				201	} else if (lock_flags & XFS_IOLOCK_SHARED) {
				202	if (!mrtryaccess(&ip->i_iolock))
				203	goto out;
				204	}
				205	if (lock_flags & XFS_ILOCK_EXCL) {
				206	if (!mrtryupdate(&ip->i_lock))
				207	goto out_undo_iolock;
				208	} else if (lock_flags & XFS_ILOCK_SHARED) {
				209	if (!mrtryaccess(&ip->i_lock))
				210	goto out_undo_iolock;
				211	}
				212	return 1;
				213
				214	out_undo_iolock:
				215	if (lock_flags & XFS_IOLOCK_EXCL)
				216	mrunlock_excl(&ip->i_iolock);
				217	else if (lock_flags & XFS_IOLOCK_SHARED)
				218	mrunlock_shared(&ip->i_iolock);
				219	out:
				220	return 0;
				221	}
				222
				223	/*
				224	* xfs_iunlock() is used to drop the inode locks acquired with
				225	* xfs_ilock() and xfs_ilock_nowait(). The caller must pass
				226	* in the flags given to xfs_ilock() or xfs_ilock_nowait() so
				227	* that we know which locks to drop.
				228	*
				229	* ip -- the inode being unlocked
				230	* lock_flags -- this parameter indicates the inode's locks to be
				231	* to be unlocked. See the comment for xfs_ilock() for a list
				232	* of valid values for this parameter.
				233	*
				234	*/
				235	void
				236	xfs_iunlock(
				237	xfs_inode_t *ip,
				238	uint lock_flags)
				239	{
				240	/*
				241	* You can't set both SHARED and EXCL for the same lock,
				242	* and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
				243	* and XFS_ILOCK_EXCL are valid values to set in lock_flags.
				244	*/
				245	ASSERT((lock_flags & (XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL)) !=
				246	(XFS_IOLOCK_SHARED \| XFS_IOLOCK_EXCL));
				247	ASSERT((lock_flags & (XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL)) !=
				248	(XFS_ILOCK_SHARED \| XFS_ILOCK_EXCL));
				249	ASSERT((lock_flags & ~(XFS_LOCK_MASK \| XFS_LOCK_DEP_MASK)) == 0);
				250	ASSERT(lock_flags != 0);
				251
				252	if (lock_flags & XFS_IOLOCK_EXCL)
				253	mrunlock_excl(&ip->i_iolock);
				254	else if (lock_flags & XFS_IOLOCK_SHARED)
				255	mrunlock_shared(&ip->i_iolock);
				256
				257	if (lock_flags & XFS_ILOCK_EXCL)
				258	mrunlock_excl(&ip->i_lock);
				259	else if (lock_flags & XFS_ILOCK_SHARED)
				260	mrunlock_shared(&ip->i_lock);
				261
				262	trace_xfs_iunlock(ip, lock_flags, _RET_IP_);
				263	}
				264
				265	/*
				266	* give up write locks. the i/o lock cannot be held nested
				267	* if it is being demoted.
				268	*/
				269	void
				270	xfs_ilock_demote(
				271	xfs_inode_t *ip,
				272	uint lock_flags)
				273	{
				274	ASSERT(lock_flags & (XFS_IOLOCK_EXCL\|XFS_ILOCK_EXCL));
				275	ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL\|XFS_ILOCK_EXCL)) == 0);
				276
				277	if (lock_flags & XFS_ILOCK_EXCL)
				278	mrdemote(&ip->i_lock);
				279	if (lock_flags & XFS_IOLOCK_EXCL)
				280	mrdemote(&ip->i_iolock);
				281
				282	trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_);
				283	}
				284
Dave Chinner	742ae1e	2013-04-30 21:39:34 +1000	[diff] [blame]	285	#if defined(DEBUG) \|\| defined(XFS_WARN)
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	286	int
				287	xfs_isilocked(
				288	xfs_inode_t *ip,
				289	uint lock_flags)
				290	{
				291	if (lock_flags & (XFS_ILOCK_EXCL\|XFS_ILOCK_SHARED)) {
				292	if (!(lock_flags & XFS_ILOCK_SHARED))
				293	return !!ip->i_lock.mr_writer;
				294	return rwsem_is_locked(&ip->i_lock.mr_lock);
				295	}
				296
				297	if (lock_flags & (XFS_IOLOCK_EXCL\|XFS_IOLOCK_SHARED)) {
				298	if (!(lock_flags & XFS_IOLOCK_SHARED))
				299	return !!ip->i_iolock.mr_writer;
				300	return rwsem_is_locked(&ip->i_iolock.mr_lock);
				301	}
				302
				303	ASSERT(0);
				304	return 0;
				305	}
				306	#endif
				307
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	308	#ifdef DEBUG
				309	int xfs_locked_n;
				310	int xfs_small_retries;
				311	int xfs_middle_retries;
				312	int xfs_lots_retries;
				313	int xfs_lock_delays;
				314	#endif
				315
				316	/*
				317	* Bump the subclass so xfs_lock_inodes() acquires each lock with
				318	* a different value
				319	*/
				320	static inline int
				321	xfs_lock_inumorder(int lock_mode, int subclass)
				322	{
				323	if (lock_mode & (XFS_IOLOCK_SHARED\|XFS_IOLOCK_EXCL))
				324	lock_mode \|= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
				325	if (lock_mode & (XFS_ILOCK_SHARED\|XFS_ILOCK_EXCL))
				326	lock_mode \|= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
				327
				328	return lock_mode;
				329	}
				330
				331	/*
				332	* The following routine will lock n inodes in exclusive mode.
				333	* We assume the caller calls us with the inodes in i_ino order.
				334	*
				335	* We need to detect deadlock where an inode that we lock
				336	* is in the AIL and we start waiting for another inode that is locked
				337	* by a thread in a long running transaction (such as truncate). This can
				338	* result in deadlock since the long running trans might need to wait
				339	* for the inode we just locked in order to push the tail and free space
				340	* in the log.
				341	*/
				342	void
				343	xfs_lock_inodes(
				344	xfs_inode_t **ips,
				345	int inodes,
				346	uint lock_mode)
				347	{
				348	int attempts = 0, i, j, try_lock;
				349	xfs_log_item_t *lp;
				350
				351	ASSERT(ips && (inodes >= 2)); /* we need at least two */
				352
				353	try_lock = 0;
				354	i = 0;
				355
				356	again:
				357	for (; i < inodes; i++) {
				358	ASSERT(ips[i]);
				359
				360	if (i && (ips[i] == ips[i-1])) /* Already locked */
				361	continue;
				362
				363	/*
				364	* If try_lock is not set yet, make sure all locked inodes
				365	* are not in the AIL.
				366	* If any are, set try_lock to be used later.
				367	*/
				368
				369	if (!try_lock) {
				370	for (j = (i - 1); j >= 0 && !try_lock; j--) {
				371	lp = (xfs_log_item_t *)ips[j]->i_itemp;
				372	if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
				373	try_lock++;
				374	}
				375	}
				376	}
				377
				378	/*
				379	* If any of the previous locks we have locked is in the AIL,
				380	* we must TRY to get the second and subsequent locks. If
				381	* we can't get any, we must release all we have
				382	* and try again.
				383	*/
				384
				385	if (try_lock) {
				386	/* try_lock must be 0 if i is 0. */
				387	/*
				388	* try_lock means we have an inode locked
				389	* that is in the AIL.
				390	*/
				391	ASSERT(i != 0);
				392	if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) {
				393	attempts++;
				394
				395	/*
				396	* Unlock all previous guys and try again.
				397	* xfs_iunlock will try to push the tail
				398	* if the inode is in the AIL.
				399	*/
				400
				401	for(j = i - 1; j >= 0; j--) {
				402
				403	/*
				404	* Check to see if we've already
				405	* unlocked this one.
				406	* Not the first one going back,
				407	* and the inode ptr is the same.
				408	*/
				409	if ((j != (i - 1)) && ips[j] ==
				410	ips[j+1])
				411	continue;
				412
				413	xfs_iunlock(ips[j], lock_mode);
				414	}
				415
				416	if ((attempts % 5) == 0) {
				417	delay(1); /* Don't just spin the CPU */
				418	#ifdef DEBUG
				419	xfs_lock_delays++;
				420	#endif
				421	}
				422	i = 0;
				423	try_lock = 0;
				424	goto again;
				425	}
				426	} else {
				427	xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
				428	}
				429	}
				430
				431	#ifdef DEBUG
				432	if (attempts) {
				433	if (attempts < 5) xfs_small_retries++;
				434	else if (attempts < 100) xfs_middle_retries++;
				435	else xfs_lots_retries++;
				436	} else {
				437	xfs_locked_n++;
				438	}
				439	#endif
				440	}
				441
				442	/*
				443	* xfs_lock_two_inodes() can only be used to lock one type of lock
				444	* at a time - the iolock or the ilock, but not both at once. If
				445	* we lock both at once, lockdep will report false positives saying
				446	* we have violated locking orders.
				447	*/
				448	void
				449	xfs_lock_two_inodes(
				450	xfs_inode_t *ip0,
				451	xfs_inode_t *ip1,
				452	uint lock_mode)
				453	{
				454	xfs_inode_t *temp;
				455	int attempts = 0;
				456	xfs_log_item_t *lp;
				457
				458	if (lock_mode & (XFS_IOLOCK_SHARED\|XFS_IOLOCK_EXCL))
				459	ASSERT((lock_mode & (XFS_ILOCK_SHARED\|XFS_ILOCK_EXCL)) == 0);
				460	ASSERT(ip0->i_ino != ip1->i_ino);
				461
				462	if (ip0->i_ino > ip1->i_ino) {
				463	temp = ip0;
				464	ip0 = ip1;
				465	ip1 = temp;
				466	}
				467
				468	again:
				469	xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0));
				470
				471	/*
				472	* If the first lock we have locked is in the AIL, we must TRY to get
				473	* the second lock. If we can't get it, we must release the first one
				474	* and try again.
				475	*/
				476	lp = (xfs_log_item_t *)ip0->i_itemp;
				477	if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
				478	if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) {
				479	xfs_iunlock(ip0, lock_mode);
				480	if ((++attempts % 5) == 0)
				481	delay(1); /* Don't just spin the CPU */
				482	goto again;
				483	}
				484	} else {
				485	xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1));
				486	}
				487	}
				488
				489
Dave Chinner	fa96aca	2012-10-08 21:56:10 +1100	[diff] [blame]	490	void
				491	__xfs_iflock(
				492	struct xfs_inode *ip)
				493	{
				494	wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT);
				495	DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);
				496
				497	do {
				498	prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
				499	if (xfs_isiflocked(ip))
				500	io_schedule();
				501	} while (!xfs_iflock_nowait(ip));
				502
				503	finish_wait(wq, &wait.wait);
				504	}
				505
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	506	STATIC uint
				507	_xfs_dic2xflags(
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	508	__uint16_t di_flags)
				509	{
				510	uint flags = 0;
				511
				512	if (di_flags & XFS_DIFLAG_ANY) {
				513	if (di_flags & XFS_DIFLAG_REALTIME)
				514	flags \|= XFS_XFLAG_REALTIME;
				515	if (di_flags & XFS_DIFLAG_PREALLOC)
				516	flags \|= XFS_XFLAG_PREALLOC;
				517	if (di_flags & XFS_DIFLAG_IMMUTABLE)
				518	flags \|= XFS_XFLAG_IMMUTABLE;
				519	if (di_flags & XFS_DIFLAG_APPEND)
				520	flags \|= XFS_XFLAG_APPEND;
				521	if (di_flags & XFS_DIFLAG_SYNC)
				522	flags \|= XFS_XFLAG_SYNC;
				523	if (di_flags & XFS_DIFLAG_NOATIME)
				524	flags \|= XFS_XFLAG_NOATIME;
				525	if (di_flags & XFS_DIFLAG_NODUMP)
				526	flags \|= XFS_XFLAG_NODUMP;
				527	if (di_flags & XFS_DIFLAG_RTINHERIT)
				528	flags \|= XFS_XFLAG_RTINHERIT;
				529	if (di_flags & XFS_DIFLAG_PROJINHERIT)
				530	flags \|= XFS_XFLAG_PROJINHERIT;
				531	if (di_flags & XFS_DIFLAG_NOSYMLINKS)
				532	flags \|= XFS_XFLAG_NOSYMLINKS;
Nathan Scott	dd9f438	2006-01-11 15:28:28 +1100	[diff] [blame]	533	if (di_flags & XFS_DIFLAG_EXTSIZE)
				534	flags \|= XFS_XFLAG_EXTSIZE;
				535	if (di_flags & XFS_DIFLAG_EXTSZINHERIT)
				536	flags \|= XFS_XFLAG_EXTSZINHERIT;
Barry Naujok	d3446ea	2006-06-09 14:54:19 +1000	[diff] [blame]	537	if (di_flags & XFS_DIFLAG_NODEFRAG)
				538	flags \|= XFS_XFLAG_NODEFRAG;
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	539	if (di_flags & XFS_DIFLAG_FILESTREAM)
				540	flags \|= XFS_XFLAG_FILESTREAM;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	541	}
				542
				543	return flags;
				544	}
				545
				546	uint
				547	xfs_ip2xflags(
				548	xfs_inode_t *ip)
				549	{
Christoph Hellwig	347d1c0	2007-08-28 13:57:51 +1000	[diff] [blame]	550	xfs_icdinode_t *dic = &ip->i_d;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	551
Nathan Scott	a916e2b	2006-06-09 17:12:17 +1000	[diff] [blame]	552	return _xfs_dic2xflags(dic->di_flags) \|
Christoph Hellwig	45ba598	2007-12-07 14:07:20 +1100	[diff] [blame]	553	(XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	554	}
				555
				556	uint
				557	xfs_dic2xflags(
Christoph Hellwig	45ba598	2007-12-07 14:07:20 +1100	[diff] [blame]	558	xfs_dinode_t *dip)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	559	{
Christoph Hellwig	81591fe	2008-11-28 14:23:39 +1100	[diff] [blame]	560	return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) \|
Christoph Hellwig	45ba598	2007-12-07 14:07:20 +1100	[diff] [blame]	561	(XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	562	}
				563
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	564	/*
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	565	* Lookups up an inode from "name". If ci_name is not NULL, then a CI match
				566	* is allowed, otherwise it has to be an exact match. If a CI match is found,
				567	* ci_name->name will point to a the actual name (caller must free) or
				568	* will be set to NULL if an exact match is found.
				569	*/
				570	int
				571	xfs_lookup(
				572	xfs_inode_t *dp,
				573	struct xfs_name *name,
				574	xfs_inode_t **ipp,
				575	struct xfs_name *ci_name)
				576	{
				577	xfs_ino_t inum;
				578	int error;
				579	uint lock_mode;
				580
				581	trace_xfs_lookup(dp, name);
				582
				583	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	584	return -EIO;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	585
Christoph Hellwig	309ecac	2013-12-06 12:30:09 -0800	[diff] [blame]	586	lock_mode = xfs_ilock_data_map_shared(dp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	587	error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
Christoph Hellwig	01f4f32	2013-12-06 12:30:08 -0800	[diff] [blame]	588	xfs_iunlock(dp, lock_mode);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	589
				590	if (error)
				591	goto out;
				592
				593	error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp);
				594	if (error)
				595	goto out_free_name;
				596
				597	return 0;
				598
				599	out_free_name:
				600	if (ci_name)
				601	kmem_free(ci_name->name);
				602	out:
				603	*ipp = NULL;
				604	return error;
				605	}
				606
				607	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	608	* Allocate an inode on disk and return a copy of its in-core version.
				609	* The in-core inode is locked exclusively. Set mode, nlink, and rdev
				610	* appropriately within the inode. The uid and gid for the inode are
				611	* set according to the contents of the given cred structure.
				612	*
				613	* Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc()
Carlos Maiolino	cd856db	2012-10-20 11:08:19 -0300	[diff] [blame]	614	* has a free inode available, call xfs_iget() to obtain the in-core
				615	* version of the allocated inode. Finally, fill in the inode and
				616	* log its initial contents. In this case, ialloc_context would be
				617	* set to NULL.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	618	*
Carlos Maiolino	cd856db	2012-10-20 11:08:19 -0300	[diff] [blame]	619	* If xfs_dialloc() does not have an available inode, it will replenish
				620	* its supply by doing an allocation. Since we can only do one
				621	* allocation within a transaction without deadlocks, we must commit
				622	* the current transaction before returning the inode itself.
				623	* In this case, therefore, we will set ialloc_context and return.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	624	* The caller should then commit the current transaction, start a new
				625	* transaction, and call xfs_ialloc() again to actually get the inode.
				626	*
				627	* To ensure that some other process does not grab the inode that
				628	* was allocated during the first call to xfs_ialloc(), this routine
				629	* also returns the [locked] bp pointing to the head of the freelist
				630	* as ialloc_context. The caller should hold this buffer across
				631	* the commit and pass it back into this routine on the second call.
David Chinner	b11f94d	2007-07-11 11:09:33 +1000	[diff] [blame]	632	*
				633	* If we are allocating quota inodes, we do not have a parent inode
				634	* to attach to or associate with (i.e. pip == NULL) because they
				635	* are not linked into the directory structure - they are attached
				636	* directly to the superblock - and so have no parent.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	637	*/
				638	int
				639	xfs_ialloc(
				640	xfs_trans_t *tp,
				641	xfs_inode_t *pip,
Al Viro	576b1d6	2011-07-26 02:50:15 -0400	[diff] [blame]	642	umode_t mode,
Nathan Scott	31b084a	2005-05-05 13:25:00 -0700	[diff] [blame]	643	xfs_nlink_t nlink,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	644	xfs_dev_t rdev,
Arkadiusz Mi?kiewicz	6743099	2010-09-26 06:10:18 +0000	[diff] [blame]	645	prid_t prid,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	646	int okalloc,
				647	xfs_buf_t **ialloc_context,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	648	xfs_inode_t **ipp)
				649	{
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	650	struct xfs_mount *mp = tp->t_mountp;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	651	xfs_ino_t ino;
				652	xfs_inode_t *ip;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	653	uint flags;
				654	int error;
Dave Chinner	e076b0f	2014-10-02 09:18:13 +1000	[diff] [blame]	655	struct timespec tv;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	656
				657	/*
				658	* Call the space management code to pick
				659	* the on-disk inode to be allocated.
				660	*/
David Chinner	b11f94d	2007-07-11 11:09:33 +1000	[diff] [blame]	661	error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
Christoph Hellwig	0835890	2012-07-04 10:54:47 -0400	[diff] [blame]	662	ialloc_context, &ino);
David Chinner	bf90424	2008-10-30 17:36:14 +1100	[diff] [blame]	663	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	664	return error;
Christoph Hellwig	0835890	2012-07-04 10:54:47 -0400	[diff] [blame]	665	if (*ialloc_context \|\| ino == NULLFSINO) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	666	*ipp = NULL;
				667	return 0;
				668	}
				669	ASSERT(*ialloc_context == NULL);
				670
				671	/*
				672	* Get the in-core inode with the lock held exclusively.
				673	* This is because we're setting fields here we need
				674	* to prevent others from looking at until we're done.
				675	*/
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	676	error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE,
Christoph Hellwig	ec3ba85	2011-02-13 13:26:42 +0000	[diff] [blame]	677	XFS_ILOCK_EXCL, &ip);
David Chinner	bf90424	2008-10-30 17:36:14 +1100	[diff] [blame]	678	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	679	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	680	ASSERT(ip != NULL);
				681
Dave Chinner	263997a	2014-05-20 07:46:40 +1000	[diff] [blame]	682	/*
				683	* We always convert v1 inodes to v2 now - we only support filesystems
				684	* with >= v2 inode capability, so there is no reason for ever leaving
				685	* an inode in v1 format.
				686	*/
				687	if (ip->i_d.di_version == 1)
				688	ip->i_d.di_version = 2;
				689
Al Viro	576b1d6	2011-07-26 02:50:15 -0400	[diff] [blame]	690	ip->i_d.di_mode = mode;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	691	ip->i_d.di_onlink = 0;
				692	ip->i_d.di_nlink = nlink;
				693	ASSERT(ip->i_d.di_nlink == nlink);
Dwight Engen	7aab1b2	2013-08-15 14:08:01 -0400	[diff] [blame]	694	ip->i_d.di_uid = xfs_kuid_to_uid(current_fsuid());
				695	ip->i_d.di_gid = xfs_kgid_to_gid(current_fsgid());
Arkadiusz Mi?kiewicz	6743099	2010-09-26 06:10:18 +0000	[diff] [blame]	696	xfs_set_projid(ip, prid);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	697	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
				698
Christoph Hellwig	bd186aa	2007-08-30 17:21:12 +1000	[diff] [blame]	699	if (pip && XFS_INHERIT_GID(pip)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	700	ip->i_d.di_gid = pip->i_d.di_gid;
Al Viro	abbede1	2011-07-26 02:31:30 -0400	[diff] [blame]	701	if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	702	ip->i_d.di_mode \|= S_ISGID;
				703	}
				704	}
				705
				706	/*
				707	* If the group ID of the new file does not match the effective group
				708	* ID or one of the supplementary group IDs, the S_ISGID bit is cleared
				709	* (and only if the irix_sgid_inherit compatibility variable is set).
				710	*/
				711	if ((irix_sgid_inherit) &&
				712	(ip->i_d.di_mode & S_ISGID) &&
Dwight Engen	7aab1b2	2013-08-15 14:08:01 -0400	[diff] [blame]	713	(!in_group_p(xfs_gid_to_kgid(ip->i_d.di_gid)))) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	714	ip->i_d.di_mode &= ~S_ISGID;
				715	}
				716
				717	ip->i_d.di_size = 0;
				718	ip->i_d.di_nextents = 0;
				719	ASSERT(ip->i_d.di_nblocks == 0);
Christoph Hellwig	dff35fd	2008-08-13 16:44:15 +1000	[diff] [blame]	720
Dave Chinner	e076b0f	2014-10-02 09:18:13 +1000	[diff] [blame]	721	tv = current_fs_time(mp->m_super);
Christoph Hellwig	dff35fd	2008-08-13 16:44:15 +1000	[diff] [blame]	722	ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
				723	ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
				724	ip->i_d.di_atime = ip->i_d.di_mtime;
				725	ip->i_d.di_ctime = ip->i_d.di_mtime;
				726
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	727	/*
				728	* di_gen will have been taken care of in xfs_iread.
				729	*/
				730	ip->i_d.di_extsize = 0;
				731	ip->i_d.di_dmevmask = 0;
				732	ip->i_d.di_dmstate = 0;
				733	ip->i_d.di_flags = 0;
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	734
				735	if (ip->i_d.di_version == 3) {
				736	ASSERT(ip->i_d.di_ino == ino);
				737	ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid));
				738	ip->i_d.di_crc = 0;
				739	ip->i_d.di_changecount = 1;
				740	ip->i_d.di_lsn = 0;
				741	ip->i_d.di_flags2 = 0;
				742	memset(&(ip->i_d.di_pad2[0]), 0, sizeof(ip->i_d.di_pad2));
				743	ip->i_d.di_crtime = ip->i_d.di_mtime;
				744	}
				745
				746
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	747	flags = XFS_ILOG_CORE;
				748	switch (mode & S_IFMT) {
				749	case S_IFIFO:
				750	case S_IFCHR:
				751	case S_IFBLK:
				752	case S_IFSOCK:
				753	ip->i_d.di_format = XFS_DINODE_FMT_DEV;
				754	ip->i_df.if_u2.if_rdev = rdev;
				755	ip->i_df.if_flags = 0;
				756	flags \|= XFS_ILOG_DEV;
				757	break;
				758	case S_IFREG:
				759	case S_IFDIR:
David Chinner	b11f94d	2007-07-11 11:09:33 +1000	[diff] [blame]	760	if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
Nathan Scott	365ca83	2005-06-21 15:39:12 +1000	[diff] [blame]	761	uint di_flags = 0;
				762
Al Viro	abbede1	2011-07-26 02:31:30 -0400	[diff] [blame]	763	if (S_ISDIR(mode)) {
Nathan Scott	365ca83	2005-06-21 15:39:12 +1000	[diff] [blame]	764	if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
				765	di_flags \|= XFS_DIFLAG_RTINHERIT;
Nathan Scott	dd9f438	2006-01-11 15:28:28 +1100	[diff] [blame]	766	if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
				767	di_flags \|= XFS_DIFLAG_EXTSZINHERIT;
				768	ip->i_d.di_extsize = pip->i_d.di_extsize;
				769	}
Dave Chinner	9336e3a	2014-10-02 09:18:40 +1000	[diff] [blame]	770	if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
				771	di_flags \|= XFS_DIFLAG_PROJINHERIT;
Al Viro	abbede1	2011-07-26 02:31:30 -0400	[diff] [blame]	772	} else if (S_ISREG(mode)) {
Christoph Hellwig	613d704	2007-10-11 17:44:08 +1000	[diff] [blame]	773	if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
Nathan Scott	365ca83	2005-06-21 15:39:12 +1000	[diff] [blame]	774	di_flags \|= XFS_DIFLAG_REALTIME;
Nathan Scott	dd9f438	2006-01-11 15:28:28 +1100	[diff] [blame]	775	if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
				776	di_flags \|= XFS_DIFLAG_EXTSIZE;
				777	ip->i_d.di_extsize = pip->i_d.di_extsize;
				778	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	779	}
				780	if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) &&
				781	xfs_inherit_noatime)
Nathan Scott	365ca83	2005-06-21 15:39:12 +1000	[diff] [blame]	782	di_flags \|= XFS_DIFLAG_NOATIME;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	783	if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) &&
				784	xfs_inherit_nodump)
Nathan Scott	365ca83	2005-06-21 15:39:12 +1000	[diff] [blame]	785	di_flags \|= XFS_DIFLAG_NODUMP;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	786	if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) &&
				787	xfs_inherit_sync)
Nathan Scott	365ca83	2005-06-21 15:39:12 +1000	[diff] [blame]	788	di_flags \|= XFS_DIFLAG_SYNC;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	789	if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) &&
				790	xfs_inherit_nosymlinks)
Nathan Scott	365ca83	2005-06-21 15:39:12 +1000	[diff] [blame]	791	di_flags \|= XFS_DIFLAG_NOSYMLINKS;
Barry Naujok	d3446ea	2006-06-09 14:54:19 +1000	[diff] [blame]	792	if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) &&
				793	xfs_inherit_nodefrag)
				794	di_flags \|= XFS_DIFLAG_NODEFRAG;
David Chinner	2a82b8b	2007-07-11 11:09:12 +1000	[diff] [blame]	795	if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM)
				796	di_flags \|= XFS_DIFLAG_FILESTREAM;
Nathan Scott	365ca83	2005-06-21 15:39:12 +1000	[diff] [blame]	797	ip->i_d.di_flags \|= di_flags;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	798	}
				799	/* FALLTHROUGH */
				800	case S_IFLNK:
				801	ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
				802	ip->i_df.if_flags = XFS_IFEXTENTS;
				803	ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
				804	ip->i_df.if_u1.if_extents = NULL;
				805	break;
				806	default:
				807	ASSERT(0);
				808	}
				809	/*
				810	* Attribute fork settings for new inode.
				811	*/
				812	ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
				813	ip->i_d.di_anextents = 0;
				814
				815	/*
				816	* Log the new values stuffed into the inode.
				817	*/
Christoph Hellwig	ddc3415	2011-09-19 15:00:54 +0000	[diff] [blame]	818	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	819	xfs_trans_log_inode(tp, ip, flags);
				820
Nathan Scott	b83bd13	2006-06-09 16:48:30 +1000	[diff] [blame]	821	/* now that we have an i_mode we can setup inode ops and unlock */
Christoph Hellwig	41be8be	2008-08-13 16:23:13 +1000	[diff] [blame]	822	xfs_setup_inode(ip);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	823
				824	*ipp = ip;
				825	return 0;
				826	}
				827
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	828	/*
				829	* Allocates a new inode from disk and return a pointer to the
				830	* incore copy. This routine will internally commit the current
				831	* transaction and allocate a new one if the Space Manager needed
				832	* to do an allocation to replenish the inode free-list.
				833	*
				834	* This routine is designed to be called from xfs_create and
				835	* xfs_create_dir.
				836	*
				837	*/
				838	int
				839	xfs_dir_ialloc(
				840	xfs_trans_t *tpp, / input: current transaction;
				841	output: may be a new transaction. */
				842	xfs_inode_t dp, / directory within whose allocate
				843	the inode. */
				844	umode_t mode,
				845	xfs_nlink_t nlink,
				846	xfs_dev_t rdev,
				847	prid_t prid, /* project id */
				848	int okalloc, /* ok to allocate new space */
				849	xfs_inode_t *ipp, / pointer to inode; it will be
				850	locked. */
				851	int *committed)
				852
				853	{
				854	xfs_trans_t *tp;
				855	xfs_trans_t *ntp;
				856	xfs_inode_t *ip;
				857	xfs_buf_t *ialloc_context = NULL;
				858	int code;
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	859	void *dqinfo;
				860	uint tflags;
				861
				862	tp = *tpp;
				863	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
				864
				865	/*
				866	* xfs_ialloc will return a pointer to an incore inode if
				867	* the Space Manager has an available inode on the free
				868	* list. Otherwise, it will do an allocation and replenish
				869	* the freelist. Since we can only do one allocation per
				870	* transaction without deadlocks, we will need to commit the
				871	* current transaction and start a new one. We will then
				872	* need to call xfs_ialloc again to get the inode.
				873	*
				874	* If xfs_ialloc did an allocation to replenish the freelist,
				875	* it returns the bp containing the head of the freelist as
				876	* ialloc_context. We will hold a lock on it across the
				877	* transaction commit so that no other process can steal
				878	* the inode(s) that we've just allocated.
				879	*/
				880	code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc,
				881	&ialloc_context, &ip);
				882
				883	/*
				884	* Return an error if we were unable to allocate a new inode.
				885	* This should only happen if we run out of space on disk or
				886	* encounter a disk error.
				887	*/
				888	if (code) {
				889	*ipp = NULL;
				890	return code;
				891	}
				892	if (!ialloc_context && !ip) {
				893	*ipp = NULL;
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	894	return -ENOSPC;
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	895	}
				896
				897	/*
				898	* If the AGI buffer is non-NULL, then we were unable to get an
				899	* inode in one operation. We need to commit the current
				900	* transaction and call xfs_ialloc() again. It is guaranteed
				901	* to succeed the second time.
				902	*/
				903	if (ialloc_context) {
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	904	struct xfs_trans_res tres;
				905
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	906	/*
				907	* Normally, xfs_trans_commit releases all the locks.
				908	* We call bhold to hang on to the ialloc_context across
				909	* the commit. Holding this buffer prevents any other
				910	* processes from doing any allocations in this
				911	* allocation group.
				912	*/
				913	xfs_trans_bhold(tp, ialloc_context);
				914	/*
				915	* Save the log reservation so we can use
				916	* them in the next transaction.
				917	*/
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	918	tres.tr_logres = xfs_trans_get_log_res(tp);
				919	tres.tr_logcount = xfs_trans_get_log_count(tp);
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	920
				921	/*
				922	* We want the quota changes to be associated with the next
				923	* transaction, NOT this one. So, detach the dqinfo from this
				924	* and attach it to the next transaction.
				925	*/
				926	dqinfo = NULL;
				927	tflags = 0;
				928	if (tp->t_dqinfo) {
				929	dqinfo = (void *)tp->t_dqinfo;
				930	tp->t_dqinfo = NULL;
				931	tflags = tp->t_flags & XFS_TRANS_DQ_DIRTY;
				932	tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY);
				933	}
				934
				935	ntp = xfs_trans_dup(tp);
				936	code = xfs_trans_commit(tp, 0);
				937	tp = ntp;
				938	if (committed != NULL) {
				939	*committed = 1;
				940	}
				941	/*
				942	* If we get an error during the commit processing,
				943	* release the buffer that is still held and return
				944	* to the caller.
				945	*/
				946	if (code) {
				947	xfs_buf_relse(ialloc_context);
				948	if (dqinfo) {
				949	tp->t_dqinfo = dqinfo;
				950	xfs_trans_free_dqinfo(tp);
				951	}
				952	*tpp = ntp;
				953	*ipp = NULL;
				954	return code;
				955	}
				956
				957	/*
				958	* transaction commit worked ok so we can drop the extra ticket
				959	* reference that we gained in xfs_trans_dup()
				960	*/
				961	xfs_log_ticket_put(tp->t_ticket);
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	962	tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
				963	code = xfs_trans_reserve(tp, &tres, 0, 0);
				964
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	965	/*
				966	* Re-attach the quota info that we detached from prev trx.
				967	*/
				968	if (dqinfo) {
				969	tp->t_dqinfo = dqinfo;
				970	tp->t_flags \|= tflags;
				971	}
				972
				973	if (code) {
				974	xfs_buf_relse(ialloc_context);
				975	*tpp = ntp;
				976	*ipp = NULL;
				977	return code;
				978	}
				979	xfs_trans_bjoin(tp, ialloc_context);
				980
				981	/*
				982	* Call ialloc again. Since we've locked out all
				983	* other allocations in this allocation group,
				984	* this call should always succeed.
				985	*/
				986	code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid,
				987	okalloc, &ialloc_context, &ip);
				988
				989	/*
				990	* If we get an error at this point, return to the caller
				991	* so that the current transaction can be aborted.
				992	*/
				993	if (code) {
				994	*tpp = tp;
				995	*ipp = NULL;
				996	return code;
				997	}
				998	ASSERT(!ialloc_context && ip);
				999
				1000	} else {
				1001	if (committed != NULL)
				1002	*committed = 0;
				1003	}
				1004
				1005	*ipp = ip;
				1006	*tpp = tp;
				1007
				1008	return 0;
				1009	}
				1010
				1011	/*
				1012	* Decrement the link count on an inode & log the change.
				1013	* If this causes the link count to go to zero, initiate the
				1014	* logging activity required to truncate a file.
				1015	*/
				1016	int /* error */
				1017	xfs_droplink(
				1018	xfs_trans_t *tp,
				1019	xfs_inode_t *ip)
				1020	{
				1021	int error;
				1022
				1023	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
				1024
				1025	ASSERT (ip->i_d.di_nlink > 0);
				1026	ip->i_d.di_nlink--;
				1027	drop_nlink(VFS_I(ip));
				1028	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				1029
				1030	error = 0;
				1031	if (ip->i_d.di_nlink == 0) {
				1032	/*
				1033	* We're dropping the last link to this file.
				1034	* Move the on-disk inode to the AGI unlinked list.
				1035	* From xfs_inactive() we will pull the inode from
				1036	* the list and free it.
				1037	*/
				1038	error = xfs_iunlink(tp, ip);
				1039	}
				1040	return error;
				1041	}
				1042
				1043	/*
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	1044	* Increment the link count on an inode & log the change.
				1045	*/
				1046	int
				1047	xfs_bumplink(
				1048	xfs_trans_t *tp,
				1049	xfs_inode_t *ip)
				1050	{
				1051	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
				1052
Dave Chinner	263997a	2014-05-20 07:46:40 +1000	[diff] [blame]	1053	ASSERT(ip->i_d.di_version > 1);
Zhi Yong Wu	ab29743	2013-12-18 08:22:41 +0800	[diff] [blame]	1054	ASSERT(ip->i_d.di_nlink > 0 \|\| (VFS_I(ip)->i_state & I_LINKABLE));
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	1055	ip->i_d.di_nlink++;
				1056	inc_nlink(VFS_I(ip));
Dave Chinner	e546cb7	2013-08-12 20:49:47 +1000	[diff] [blame]	1057	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				1058	return 0;
				1059	}
				1060
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1061	int
				1062	xfs_create(
				1063	xfs_inode_t *dp,
				1064	struct xfs_name *name,
				1065	umode_t mode,
				1066	xfs_dev_t rdev,
				1067	xfs_inode_t **ipp)
				1068	{
				1069	int is_dir = S_ISDIR(mode);
				1070	struct xfs_mount *mp = dp->i_mount;
				1071	struct xfs_inode *ip = NULL;
				1072	struct xfs_trans *tp = NULL;
				1073	int error;
				1074	xfs_bmap_free_t free_list;
				1075	xfs_fsblock_t first_block;
				1076	bool unlock_dp_on_error = false;
				1077	uint cancel_flags;
				1078	int committed;
				1079	prid_t prid;
				1080	struct xfs_dquot *udqp = NULL;
				1081	struct xfs_dquot *gdqp = NULL;
				1082	struct xfs_dquot *pdqp = NULL;
Brian Foster	062647a	2014-11-28 14:00:16 +1100	[diff] [blame]	1083	struct xfs_trans_res *tres;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1084	uint resblks;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1085
				1086	trace_xfs_create(dp, name);
				1087
				1088	if (XFS_FORCED_SHUTDOWN(mp))
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1089	return -EIO;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1090
Zhi Yong Wu	163467d	2013-12-18 08:22:39 +0800	[diff] [blame]	1091	prid = xfs_get_initial_prid(dp);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1092
				1093	/*
				1094	* Make sure that we have allocated dquot(s) on disk.
				1095	*/
Dwight Engen	7aab1b2	2013-08-15 14:08:01 -0400	[diff] [blame]	1096	error = xfs_qm_vop_dqalloc(dp, xfs_kuid_to_uid(current_fsuid()),
				1097	xfs_kgid_to_gid(current_fsgid()), prid,
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1098	XFS_QMOPT_QUOTALL \| XFS_QMOPT_INHERIT,
				1099	&udqp, &gdqp, &pdqp);
				1100	if (error)
				1101	return error;
				1102
				1103	if (is_dir) {
				1104	rdev = 0;
				1105	resblks = XFS_MKDIR_SPACE_RES(mp, name->len);
Brian Foster	062647a	2014-11-28 14:00:16 +1100	[diff] [blame]	1106	tres = &M_RES(mp)->tr_mkdir;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1107	tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR);
				1108	} else {
				1109	resblks = XFS_CREATE_SPACE_RES(mp, name->len);
Brian Foster	062647a	2014-11-28 14:00:16 +1100	[diff] [blame]	1110	tres = &M_RES(mp)->tr_create;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1111	tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
				1112	}
				1113
				1114	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
				1115
				1116	/*
				1117	* Initially assume that the file does not exist and
				1118	* reserve the resources for that case. If that is not
				1119	* the case we'll drop the one we have and get a more
				1120	* appropriate transaction later.
				1121	*/
Brian Foster	062647a	2014-11-28 14:00:16 +1100	[diff] [blame]	1122	error = xfs_trans_reserve(tp, tres, resblks, 0);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1123	if (error == -ENOSPC) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1124	/* flush outstanding delalloc blocks and retry */
				1125	xfs_flush_inodes(mp);
Brian Foster	062647a	2014-11-28 14:00:16 +1100	[diff] [blame]	1126	error = xfs_trans_reserve(tp, tres, resblks, 0);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1127	}
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1128	if (error == -ENOSPC) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1129	/* No space at all so try a "no-allocation" reservation */
				1130	resblks = 0;
Brian Foster	062647a	2014-11-28 14:00:16 +1100	[diff] [blame]	1131	error = xfs_trans_reserve(tp, tres, 0, 0);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1132	}
				1133	if (error) {
				1134	cancel_flags = 0;
				1135	goto out_trans_cancel;
				1136	}
				1137
				1138	xfs_ilock(dp, XFS_ILOCK_EXCL \| XFS_ILOCK_PARENT);
				1139	unlock_dp_on_error = true;
				1140
				1141	xfs_bmap_init(&free_list, &first_block);
				1142
				1143	/*
				1144	* Reserve disk quota and the inode.
				1145	*/
				1146	error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
				1147	pdqp, resblks, 1, 0);
				1148	if (error)
				1149	goto out_trans_cancel;
				1150
Eric Sandeen	94f3cad	2014-09-09 11:57:52 +1000	[diff] [blame]	1151	if (!resblks) {
				1152	error = xfs_dir_canenter(tp, dp, name);
				1153	if (error)
				1154	goto out_trans_cancel;
				1155	}
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1156
				1157	/*
				1158	* A newly created regular or special file just has one directory
				1159	* entry pointing to them, but a directory also the "." entry
				1160	* pointing to itself.
				1161	*/
				1162	error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev,
				1163	prid, resblks > 0, &ip, &committed);
				1164	if (error) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1165	if (error == -ENOSPC)
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1166	goto out_trans_cancel;
				1167	goto out_trans_abort;
				1168	}
				1169
				1170	/*
				1171	* Now we join the directory inode to the transaction. We do not do it
				1172	* earlier because xfs_dir_ialloc might commit the previous transaction
				1173	* (and release all the locks). An error from here on will result in
				1174	* the transaction cancel unlocking dp so don't do it explicitly in the
				1175	* error path.
				1176	*/
				1177	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
				1178	unlock_dp_on_error = false;
				1179
				1180	error = xfs_dir_createname(tp, dp, name, ip->i_ino,
				1181	&first_block, &free_list, resblks ?
				1182	resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
				1183	if (error) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1184	ASSERT(error != -ENOSPC);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1185	goto out_trans_abort;
				1186	}
				1187	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				1188	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
				1189
				1190	if (is_dir) {
				1191	error = xfs_dir_init(tp, ip, dp);
				1192	if (error)
				1193	goto out_bmap_cancel;
				1194
				1195	error = xfs_bumplink(tp, dp);
				1196	if (error)
				1197	goto out_bmap_cancel;
				1198	}
				1199
				1200	/*
				1201	* If this is a synchronous mount, make sure that the
				1202	* create transaction goes to disk before returning to
				1203	* the user.
				1204	*/
				1205	if (mp->m_flags & (XFS_MOUNT_WSYNC\|XFS_MOUNT_DIRSYNC))
				1206	xfs_trans_set_sync(tp);
				1207
				1208	/*
				1209	* Attach the dquot(s) to the inodes and modify them incore.
				1210	* These ids of the inode couldn't have changed since the new
				1211	* inode has been locked ever since it was created.
				1212	*/
				1213	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
				1214
				1215	error = xfs_bmap_finish(&tp, &free_list, &committed);
				1216	if (error)
				1217	goto out_bmap_cancel;
				1218
				1219	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
				1220	if (error)
				1221	goto out_release_inode;
				1222
				1223	xfs_qm_dqrele(udqp);
				1224	xfs_qm_dqrele(gdqp);
				1225	xfs_qm_dqrele(pdqp);
				1226
				1227	*ipp = ip;
				1228	return 0;
				1229
				1230	out_bmap_cancel:
				1231	xfs_bmap_cancel(&free_list);
				1232	out_trans_abort:
				1233	cancel_flags \|= XFS_TRANS_ABORT;
				1234	out_trans_cancel:
				1235	xfs_trans_cancel(tp, cancel_flags);
				1236	out_release_inode:
				1237	/*
				1238	* Wait until after the current transaction is aborted to
				1239	* release the inode. This prevents recursive transactions
				1240	* and deadlocks from xfs_inactive.
				1241	*/
				1242	if (ip)
				1243	IRELE(ip);
				1244
				1245	xfs_qm_dqrele(udqp);
				1246	xfs_qm_dqrele(gdqp);
				1247	xfs_qm_dqrele(pdqp);
				1248
				1249	if (unlock_dp_on_error)
				1250	xfs_iunlock(dp, XFS_ILOCK_EXCL);
				1251	return error;
				1252	}
				1253
				1254	int
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1255	xfs_create_tmpfile(
				1256	struct xfs_inode *dp,
				1257	struct dentry *dentry,
Brian Foster	330033d	2014-04-17 08:15:30 +1000	[diff] [blame]	1258	umode_t mode,
				1259	struct xfs_inode **ipp)
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1260	{
				1261	struct xfs_mount *mp = dp->i_mount;
				1262	struct xfs_inode *ip = NULL;
				1263	struct xfs_trans *tp = NULL;
				1264	int error;
				1265	uint cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
				1266	prid_t prid;
				1267	struct xfs_dquot *udqp = NULL;
				1268	struct xfs_dquot *gdqp = NULL;
				1269	struct xfs_dquot *pdqp = NULL;
				1270	struct xfs_trans_res *tres;
				1271	uint resblks;
				1272
				1273	if (XFS_FORCED_SHUTDOWN(mp))
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1274	return -EIO;
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1275
				1276	prid = xfs_get_initial_prid(dp);
				1277
				1278	/*
				1279	* Make sure that we have allocated dquot(s) on disk.
				1280	*/
				1281	error = xfs_qm_vop_dqalloc(dp, xfs_kuid_to_uid(current_fsuid()),
				1282	xfs_kgid_to_gid(current_fsgid()), prid,
				1283	XFS_QMOPT_QUOTALL \| XFS_QMOPT_INHERIT,
				1284	&udqp, &gdqp, &pdqp);
				1285	if (error)
				1286	return error;
				1287
				1288	resblks = XFS_IALLOC_SPACE_RES(mp);
				1289	tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE_TMPFILE);
				1290
				1291	tres = &M_RES(mp)->tr_create_tmpfile;
				1292	error = xfs_trans_reserve(tp, tres, resblks, 0);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1293	if (error == -ENOSPC) {
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1294	/* No space at all so try a "no-allocation" reservation */
				1295	resblks = 0;
				1296	error = xfs_trans_reserve(tp, tres, 0, 0);
				1297	}
				1298	if (error) {
				1299	cancel_flags = 0;
				1300	goto out_trans_cancel;
				1301	}
				1302
				1303	error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
				1304	pdqp, resblks, 1, 0);
				1305	if (error)
				1306	goto out_trans_cancel;
				1307
				1308	error = xfs_dir_ialloc(&tp, dp, mode, 1, 0,
				1309	prid, resblks > 0, &ip, NULL);
				1310	if (error) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1311	if (error == -ENOSPC)
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1312	goto out_trans_cancel;
				1313	goto out_trans_abort;
				1314	}
				1315
				1316	if (mp->m_flags & XFS_MOUNT_WSYNC)
				1317	xfs_trans_set_sync(tp);
				1318
				1319	/*
				1320	* Attach the dquot(s) to the inodes and modify them incore.
				1321	* These ids of the inode couldn't have changed since the new
				1322	* inode has been locked ever since it was created.
				1323	*/
				1324	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
				1325
				1326	ip->i_d.di_nlink--;
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1327	error = xfs_iunlink(tp, ip);
				1328	if (error)
				1329	goto out_trans_abort;
				1330
				1331	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
				1332	if (error)
				1333	goto out_release_inode;
				1334
				1335	xfs_qm_dqrele(udqp);
				1336	xfs_qm_dqrele(gdqp);
				1337	xfs_qm_dqrele(pdqp);
				1338
Brian Foster	330033d	2014-04-17 08:15:30 +1000	[diff] [blame]	1339	*ipp = ip;
Zhi Yong Wu	99b6436	2013-12-18 08:22:40 +0800	[diff] [blame]	1340	return 0;
				1341
				1342	out_trans_abort:
				1343	cancel_flags \|= XFS_TRANS_ABORT;
				1344	out_trans_cancel:
				1345	xfs_trans_cancel(tp, cancel_flags);
				1346	out_release_inode:
				1347	/*
				1348	* Wait until after the current transaction is aborted to
				1349	* release the inode. This prevents recursive transactions
				1350	* and deadlocks from xfs_inactive.
				1351	*/
				1352	if (ip)
				1353	IRELE(ip);
				1354
				1355	xfs_qm_dqrele(udqp);
				1356	xfs_qm_dqrele(gdqp);
				1357	xfs_qm_dqrele(pdqp);
				1358
				1359	return error;
				1360	}
				1361
				1362	int
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1363	xfs_link(
				1364	xfs_inode_t *tdp,
				1365	xfs_inode_t *sip,
				1366	struct xfs_name *target_name)
				1367	{
				1368	xfs_mount_t *mp = tdp->i_mount;
				1369	xfs_trans_t *tp;
				1370	int error;
				1371	xfs_bmap_free_t free_list;
				1372	xfs_fsblock_t first_block;
				1373	int cancel_flags;
				1374	int committed;
				1375	int resblks;
				1376
				1377	trace_xfs_link(tdp, target_name);
				1378
				1379	ASSERT(!S_ISDIR(sip->i_d.di_mode));
				1380
				1381	if (XFS_FORCED_SHUTDOWN(mp))
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1382	return -EIO;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1383
				1384	error = xfs_qm_dqattach(sip, 0);
				1385	if (error)
				1386	goto std_return;
				1387
				1388	error = xfs_qm_dqattach(tdp, 0);
				1389	if (error)
				1390	goto std_return;
				1391
				1392	tp = xfs_trans_alloc(mp, XFS_TRANS_LINK);
				1393	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
				1394	resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	1395	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, resblks, 0);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1396	if (error == -ENOSPC) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1397	resblks = 0;
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	1398	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, 0, 0);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1399	}
				1400	if (error) {
				1401	cancel_flags = 0;
				1402	goto error_return;
				1403	}
				1404
				1405	xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
				1406
				1407	xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
				1408	xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
				1409
				1410	/*
				1411	* If we are using project inheritance, we only allow hard link
				1412	* creation in our tree when the project IDs are the same; else
				1413	* the tree quota mechanism could be circumvented.
				1414	*/
				1415	if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
				1416	(xfs_get_projid(tdp) != xfs_get_projid(sip)))) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1417	error = -EXDEV;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1418	goto error_return;
				1419	}
				1420
Eric Sandeen	94f3cad	2014-09-09 11:57:52 +1000	[diff] [blame]	1421	if (!resblks) {
				1422	error = xfs_dir_canenter(tp, tdp, target_name);
				1423	if (error)
				1424	goto error_return;
				1425	}
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1426
				1427	xfs_bmap_init(&free_list, &first_block);
				1428
Zhi Yong Wu	ab29743	2013-12-18 08:22:41 +0800	[diff] [blame]	1429	if (sip->i_d.di_nlink == 0) {
				1430	error = xfs_iunlink_remove(tp, sip);
				1431	if (error)
				1432	goto abort_return;
				1433	}
				1434
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1435	error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
				1436	&first_block, &free_list, resblks);
				1437	if (error)
				1438	goto abort_return;
				1439	xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				1440	xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
				1441
				1442	error = xfs_bumplink(tp, sip);
				1443	if (error)
				1444	goto abort_return;
				1445
				1446	/*
				1447	* If this is a synchronous mount, make sure that the
				1448	* link transaction goes to disk before returning to
				1449	* the user.
				1450	*/
				1451	if (mp->m_flags & (XFS_MOUNT_WSYNC\|XFS_MOUNT_DIRSYNC)) {
				1452	xfs_trans_set_sync(tp);
				1453	}
				1454
				1455	error = xfs_bmap_finish (&tp, &free_list, &committed);
				1456	if (error) {
				1457	xfs_bmap_cancel(&free_list);
				1458	goto abort_return;
				1459	}
				1460
				1461	return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
				1462
				1463	abort_return:
				1464	cancel_flags \|= XFS_TRANS_ABORT;
				1465	error_return:
				1466	xfs_trans_cancel(tp, cancel_flags);
				1467	std_return:
				1468	return error;
				1469	}
				1470
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1471	/*
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1472	* Free up the underlying blocks past new_size. The new size must be smaller
				1473	* than the current size. This routine can be used both for the attribute and
				1474	* data fork, and does not modify the inode size, which is left to the caller.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1475	*
David Chinner	f648505	2008-04-17 16:50:04 +1000	[diff] [blame]	1476	* The transaction passed to this routine must have made a permanent log
				1477	* reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the
				1478	* given transaction and start new ones, so make sure everything involved in
				1479	* the transaction is tidy before calling here. Some transaction will be
				1480	* returned to the caller to be committed. The incoming transaction must
				1481	* already include the inode, and both inode locks must be held exclusively.
				1482	* The inode must also be "held" within the transaction. On return the inode
				1483	* will be "held" within the returned transaction. This routine does NOT
				1484	* require any disk space to be reserved for it within the transaction.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1485	*
David Chinner	f648505	2008-04-17 16:50:04 +1000	[diff] [blame]	1486	* If we get an error, we must return with the inode locked and linked into the
				1487	* current transaction. This keeps things simple for the higher level code,
				1488	* because it always knows that the inode is locked and held in the transaction
				1489	* that returns to it whether errors occur or not. We don't mark the inode
				1490	* dirty on error so that transactions can be easily aborted if possible.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1491	*/
				1492	int
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1493	xfs_itruncate_extents(
				1494	struct xfs_trans **tpp,
				1495	struct xfs_inode *ip,
				1496	int whichfork,
				1497	xfs_fsize_t new_size)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1498	{
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1499	struct xfs_mount *mp = ip->i_mount;
				1500	struct xfs_trans tp = tpp;
				1501	struct xfs_trans *ntp;
				1502	xfs_bmap_free_t free_list;
				1503	xfs_fsblock_t first_block;
				1504	xfs_fileoff_t first_unmap_block;
				1505	xfs_fileoff_t last_block;
				1506	xfs_filblks_t unmap_len;
				1507	int committed;
				1508	int error = 0;
				1509	int done = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1510
Christoph Hellwig	0b56185	2012-07-04 11:13:31 -0400	[diff] [blame]	1511	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
				1512	ASSERT(!atomic_read(&VFS_I(ip)->i_count) \|\|
				1513	xfs_isilocked(ip, XFS_IOLOCK_EXCL));
Christoph Hellwig	ce7ae151	2011-12-18 20:00:11 +0000	[diff] [blame]	1514	ASSERT(new_size <= XFS_ISIZE(ip));
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1515	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1516	ASSERT(ip->i_itemp != NULL);
Christoph Hellwig	898621d	2010-06-24 11:36:58 +1000	[diff] [blame]	1517	ASSERT(ip->i_itemp->ili_lock_flags == 0);
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1518	ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1519
Christoph Hellwig	673e8e5	2011-12-18 20:00:04 +0000	[diff] [blame]	1520	trace_xfs_itruncate_extents_start(ip, new_size);
				1521
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1522	/*
				1523	* Since it is possible for space to become allocated beyond
				1524	* the end of the file (in a crash where the space is allocated
				1525	* but the inode size is not yet updated), simply remove any
				1526	* blocks which show up between the new EOF and the maximum
				1527	* possible file size. If the first block to be removed is
				1528	* beyond the maximum file size (ie it is the same as last_block),
				1529	* then there is nothing to do.
				1530	*/
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1531	first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
Dave Chinner	3297238	2012-06-08 15:44:54 +1000	[diff] [blame]	1532	last_block = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1533	if (first_unmap_block == last_block)
				1534	return 0;
				1535
				1536	ASSERT(first_unmap_block < last_block);
				1537	unmap_len = last_block - first_unmap_block + 1;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1538	while (!done) {
Eric Sandeen	9d87c31	2009-01-14 23:22:07 -0600	[diff] [blame]	1539	xfs_bmap_init(&free_list, &first_block);
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1540	error = xfs_bunmapi(tp, ip,
Olaf Weber	3e57ecf	2006-06-09 14:48:12 +1000	[diff] [blame]	1541	first_unmap_block, unmap_len,
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1542	xfs_bmapi_aflag(whichfork),
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1543	XFS_ITRUNC_MAX_EXTENTS,
Olaf Weber	3e57ecf	2006-06-09 14:48:12 +1000	[diff] [blame]	1544	&first_block, &free_list,
Christoph Hellwig	b4e9181	2010-06-23 18:11:15 +1000	[diff] [blame]	1545	&done);
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1546	if (error)
				1547	goto out_bmap_cancel;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1548
				1549	/*
				1550	* Duplicate the transaction that has the permanent
				1551	* reservation and commit the old transaction.
				1552	*/
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1553	error = xfs_bmap_finish(&tp, &free_list, &committed);
Christoph Hellwig	898621d	2010-06-24 11:36:58 +1000	[diff] [blame]	1554	if (committed)
Christoph Hellwig	ddc3415	2011-09-19 15:00:54 +0000	[diff] [blame]	1555	xfs_trans_ijoin(tp, ip, 0);
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1556	if (error)
				1557	goto out_bmap_cancel;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1558
				1559	if (committed) {
				1560	/*
David Chinner	f648505	2008-04-17 16:50:04 +1000	[diff] [blame]	1561	* Mark the inode dirty so it will be logged and
David Chinner	e5720ee	2008-04-10 12:21:18 +1000	[diff] [blame]	1562	* moved forward in the log as part of every commit.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1563	*/
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1564	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1565	}
David Chinner	f648505	2008-04-17 16:50:04 +1000	[diff] [blame]	1566
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1567	ntp = xfs_trans_dup(tp);
				1568	error = xfs_trans_commit(tp, 0);
				1569	tp = ntp;
David Chinner	e5720ee	2008-04-10 12:21:18 +1000	[diff] [blame]	1570
Christoph Hellwig	ddc3415	2011-09-19 15:00:54 +0000	[diff] [blame]	1571	xfs_trans_ijoin(tp, ip, 0);
David Chinner	f648505	2008-04-17 16:50:04 +1000	[diff] [blame]	1572
Dave Chinner	cc09c0d	2008-11-17 17:37:10 +1100	[diff] [blame]	1573	if (error)
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1574	goto out;
				1575
Dave Chinner	cc09c0d	2008-11-17 17:37:10 +1100	[diff] [blame]	1576	/*
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1577	* Transaction commit worked ok so we can drop the extra ticket
Dave Chinner	cc09c0d	2008-11-17 17:37:10 +1100	[diff] [blame]	1578	* reference that we gained in xfs_trans_dup()
				1579	*/
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1580	xfs_log_ticket_put(tp->t_ticket);
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	1581	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
David Chinner	f648505	2008-04-17 16:50:04 +1000	[diff] [blame]	1582	if (error)
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1583	goto out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1584	}
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1585
Christoph Hellwig	673e8e5	2011-12-18 20:00:04 +0000	[diff] [blame]	1586	/*
				1587	* Always re-log the inode so that our permanent transaction can keep
				1588	* on rolling it forward in the log.
				1589	*/
				1590	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				1591
				1592	trace_xfs_itruncate_extents_end(ip, new_size);
				1593
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1594	out:
				1595	*tpp = tp;
				1596	return error;
				1597	out_bmap_cancel:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1598	/*
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1599	* If the bunmapi call encounters an error, return to the caller where
				1600	* the transaction can be properly aborted. We just need to make sure
				1601	* we're not holding any resources that we were not when we came in.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1602	*/
Christoph Hellwig	8f04c47	2011-07-08 14:34:34 +0200	[diff] [blame]	1603	xfs_bmap_cancel(&free_list);
				1604	goto out;
				1605	}
				1606
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1607	int
				1608	xfs_release(
				1609	xfs_inode_t *ip)
				1610	{
				1611	xfs_mount_t *mp = ip->i_mount;
				1612	int error;
				1613
				1614	if (!S_ISREG(ip->i_d.di_mode) \|\| (ip->i_d.di_mode == 0))
				1615	return 0;
				1616
				1617	/* If this is a read-only mount, don't do this (would generate I/O) */
				1618	if (mp->m_flags & XFS_MOUNT_RDONLY)
				1619	return 0;
				1620
				1621	if (!XFS_FORCED_SHUTDOWN(mp)) {
				1622	int truncated;
				1623
				1624	/*
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1625	* If we previously truncated this file and removed old data
				1626	* in the process, we want to initiate "early" writeout on
				1627	* the last close. This is an attempt to combat the notorious
				1628	* NULL files problem which is particularly noticeable from a
				1629	* truncate down, buffered (re-)write (delalloc), followed by
				1630	* a crash. What we are effectively doing here is
				1631	* significantly reducing the time window where we'd otherwise
				1632	* be exposed to that problem.
				1633	*/
				1634	truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
				1635	if (truncated) {
				1636	xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE);
Dave Chinner	eac152b	2014-08-04 13:22:49 +1000	[diff] [blame]	1637	if (ip->i_delayed_blks > 0) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1638	error = filemap_flush(VFS_I(ip)->i_mapping);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1639	if (error)
				1640	return error;
				1641	}
				1642	}
				1643	}
				1644
				1645	if (ip->i_d.di_nlink == 0)
				1646	return 0;
				1647
				1648	if (xfs_can_free_eofblocks(ip, false)) {
				1649
				1650	/*
				1651	* If we can't get the iolock just skip truncating the blocks
				1652	* past EOF because we could deadlock with the mmap_sem
				1653	* otherwise. We'll get another chance to drop them once the
				1654	* last reference to the inode is dropped, so we'll never leak
				1655	* blocks permanently.
				1656	*
				1657	* Further, check if the inode is being opened, written and
				1658	* closed frequently and we have delayed allocation blocks
				1659	* outstanding (e.g. streaming writes from the NFS server),
				1660	* truncating the blocks past EOF will cause fragmentation to
				1661	* occur.
				1662	*
				1663	* In this case don't do the truncation, either, but we have to
				1664	* be careful how we detect this case. Blocks beyond EOF show
				1665	* up as i_delayed_blks even when the inode is clean, so we
				1666	* need to truncate them away first before checking for a dirty
				1667	* release. Hence on the first dirty close we will still remove
				1668	* the speculative allocation, but after that we will leave it
				1669	* in place.
				1670	*/
				1671	if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
				1672	return 0;
				1673
				1674	error = xfs_free_eofblocks(mp, ip, true);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1675	if (error && error != -EAGAIN)
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1676	return error;
				1677
				1678	/* delalloc blocks after truncation means it really is dirty */
				1679	if (ip->i_delayed_blks)
				1680	xfs_iflags_set(ip, XFS_IDIRTY_RELEASE);
				1681	}
				1682	return 0;
				1683	}
				1684
				1685	/*
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1686	* xfs_inactive_truncate
				1687	*
				1688	* Called to perform a truncate when an inode becomes unlinked.
				1689	*/
				1690	STATIC int
				1691	xfs_inactive_truncate(
				1692	struct xfs_inode *ip)
				1693	{
				1694	struct xfs_mount *mp = ip->i_mount;
				1695	struct xfs_trans *tp;
				1696	int error;
				1697
				1698	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
				1699	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
				1700	if (error) {
				1701	ASSERT(XFS_FORCED_SHUTDOWN(mp));
				1702	xfs_trans_cancel(tp, 0);
				1703	return error;
				1704	}
				1705
				1706	xfs_ilock(ip, XFS_ILOCK_EXCL);
				1707	xfs_trans_ijoin(tp, ip, 0);
				1708
				1709	/*
				1710	* Log the inode size first to prevent stale data exposure in the event
				1711	* of a system crash before the truncate completes. See the related
				1712	* comment in xfs_setattr_size() for details.
				1713	*/
				1714	ip->i_d.di_size = 0;
				1715	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				1716
				1717	error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
				1718	if (error)
				1719	goto error_trans_cancel;
				1720
				1721	ASSERT(ip->i_d.di_nextents == 0);
				1722
				1723	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
				1724	if (error)
				1725	goto error_unlock;
				1726
				1727	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				1728	return 0;
				1729
				1730	error_trans_cancel:
				1731	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES \| XFS_TRANS_ABORT);
				1732	error_unlock:
				1733	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				1734	return error;
				1735	}
				1736
				1737	/*
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1738	* xfs_inactive_ifree()
				1739	*
				1740	* Perform the inode free when an inode is unlinked.
				1741	*/
				1742	STATIC int
				1743	xfs_inactive_ifree(
				1744	struct xfs_inode *ip)
				1745	{
				1746	xfs_bmap_free_t free_list;
				1747	xfs_fsblock_t first_block;
				1748	int committed;
				1749	struct xfs_mount *mp = ip->i_mount;
				1750	struct xfs_trans *tp;
				1751	int error;
				1752
				1753	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
Brian Foster	9d43b18	2014-04-24 16:00:52 +1000	[diff] [blame]	1754
				1755	/*
				1756	* The ifree transaction might need to allocate blocks for record
				1757	* insertion to the finobt. We don't want to fail here at ENOSPC, so
				1758	* allow ifree to dip into the reserved block pool if necessary.
				1759	*
				1760	* Freeing large sets of inodes generally means freeing inode chunks,
				1761	* directory and file data blocks, so this should be relatively safe.
				1762	* Only under severe circumstances should it be possible to free enough
				1763	* inodes to exhaust the reserve block pool via finobt expansion while
				1764	* at the same time not creating free space in the filesystem.
				1765	*
				1766	* Send a warning if the reservation does happen to fail, as the inode
				1767	* now remains allocated and sits on the unlinked list until the fs is
				1768	* repaired.
				1769	*/
				1770	tp->t_flags \|= XFS_TRANS_RESERVE;
				1771	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree,
				1772	XFS_IFREE_SPACE_RES(mp), 0);
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1773	if (error) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	1774	if (error == -ENOSPC) {
Brian Foster	9d43b18	2014-04-24 16:00:52 +1000	[diff] [blame]	1775	xfs_warn_ratelimited(mp,
				1776	"Failed to remove inode(s) from unlinked list. "
				1777	"Please free space, unmount and run xfs_repair.");
				1778	} else {
				1779	ASSERT(XFS_FORCED_SHUTDOWN(mp));
				1780	}
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1781	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
				1782	return error;
				1783	}
				1784
				1785	xfs_ilock(ip, XFS_ILOCK_EXCL);
				1786	xfs_trans_ijoin(tp, ip, 0);
				1787
				1788	xfs_bmap_init(&free_list, &first_block);
				1789	error = xfs_ifree(tp, ip, &free_list);
				1790	if (error) {
				1791	/*
				1792	* If we fail to free the inode, shut down. The cancel
				1793	* might do that, we need to make sure. Otherwise the
				1794	* inode might be lost for a long time or forever.
				1795	*/
				1796	if (!XFS_FORCED_SHUTDOWN(mp)) {
				1797	xfs_notice(mp, "%s: xfs_ifree returned error %d",
				1798	__func__, error);
				1799	xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
				1800	}
				1801	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES\|XFS_TRANS_ABORT);
				1802	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				1803	return error;
				1804	}
				1805
				1806	/*
				1807	* Credit the quota account(s). The inode is gone.
				1808	*/
				1809	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
				1810
				1811	/*
				1812	* Just ignore errors at this point. There is nothing we can
				1813	* do except to try to keep going. Make sure it's not a silent
				1814	* error.
				1815	*/
				1816	error = xfs_bmap_finish(&tp, &free_list, &committed);
				1817	if (error)
				1818	xfs_notice(mp, "%s: xfs_bmap_finish returned error %d",
				1819	__func__, error);
				1820	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
				1821	if (error)
				1822	xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
				1823	__func__, error);
				1824
				1825	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				1826	return 0;
				1827	}
				1828
				1829	/*
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1830	* xfs_inactive
				1831	*
				1832	* This is called when the vnode reference count for the vnode
				1833	* goes to zero. If the file has been unlinked, then it must
				1834	* now be truncated. Also, we clear all of the read-ahead state
				1835	* kept for the inode here since the file is now closed.
				1836	*/
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1837	void
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1838	xfs_inactive(
				1839	xfs_inode_t *ip)
				1840	{
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	1841	struct xfs_mount *mp;
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	1842	int error;
				1843	int truncate = 0;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1844
				1845	/*
				1846	* If the inode is already free, then there can be nothing
				1847	* to clean up here.
				1848	*/
Ben Myers	d948709	2013-09-10 18:11:22 -0500	[diff] [blame]	1849	if (ip->i_d.di_mode == 0) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1850	ASSERT(ip->i_df.if_real_bytes == 0);
				1851	ASSERT(ip->i_df.if_broot_bytes == 0);
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1852	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1853	}
				1854
				1855	mp = ip->i_mount;
				1856
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1857	/* If this is a read-only mount, don't do this (would generate I/O) */
				1858	if (mp->m_flags & XFS_MOUNT_RDONLY)
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1859	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1860
				1861	if (ip->i_d.di_nlink != 0) {
				1862	/*
				1863	* force is true because we are evicting an inode from the
				1864	* cache. Post-eof blocks must be freed, lest we end up with
				1865	* broken free space accounting.
				1866	*/
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1867	if (xfs_can_free_eofblocks(ip, true))
				1868	xfs_free_eofblocks(mp, ip, false);
				1869
				1870	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1871	}
				1872
				1873	if (S_ISREG(ip->i_d.di_mode) &&
				1874	(ip->i_d.di_size != 0 \|\| XFS_ISIZE(ip) != 0 \|\|
				1875	ip->i_d.di_nextents > 0 \|\| ip->i_delayed_blks > 0))
				1876	truncate = 1;
				1877
				1878	error = xfs_qm_dqattach(ip, 0);
				1879	if (error)
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1880	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1881
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1882	if (S_ISLNK(ip->i_d.di_mode))
Brian Foster	36b21dd	2013-09-20 11:06:09 -0400	[diff] [blame]	1883	error = xfs_inactive_symlink(ip);
Brian Foster	f7be2d7	2013-09-20 11:06:10 -0400	[diff] [blame]	1884	else if (truncate)
				1885	error = xfs_inactive_truncate(ip);
				1886	if (error)
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1887	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1888
				1889	/*
				1890	* If there are attributes associated with the file then blow them away
				1891	* now. The code calls a routine that recursively deconstructs the
				1892	* attribute fork. We need to just commit the current transaction
				1893	* because we can't use it for xfs_attr_inactive().
				1894	*/
				1895	if (ip->i_d.di_anextents > 0) {
				1896	ASSERT(ip->i_d.di_forkoff != 0);
				1897
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1898	error = xfs_attr_inactive(ip);
				1899	if (error)
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1900	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1901	}
				1902
				1903	if (ip->i_afp)
				1904	xfs_idestroy_fork(ip, XFS_ATTR_FORK);
				1905
				1906	ASSERT(ip->i_d.di_anextents == 0);
				1907
				1908	/*
				1909	* Free the inode.
				1910	*/
Brian Foster	88877d2	2013-09-20 11:06:11 -0400	[diff] [blame]	1911	error = xfs_inactive_ifree(ip);
				1912	if (error)
Brian Foster	74564fb	2013-09-20 11:06:12 -0400	[diff] [blame]	1913	return;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1914
				1915	/*
				1916	* Release the dquots held by inode, if any.
				1917	*/
				1918	xfs_qm_dqdetach(ip);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	1919	}
				1920
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1921	/*
				1922	* This is called when the inode's link count goes to 0.
				1923	* We place the on-disk inode on a list in the AGI. It
				1924	* will be pulled from this list when the inode is freed.
				1925	*/
				1926	int
				1927	xfs_iunlink(
				1928	xfs_trans_t *tp,
				1929	xfs_inode_t *ip)
				1930	{
				1931	xfs_mount_t *mp;
				1932	xfs_agi_t *agi;
				1933	xfs_dinode_t *dip;
				1934	xfs_buf_t *agibp;
				1935	xfs_buf_t *ibp;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1936	xfs_agino_t agino;
				1937	short bucket_index;
				1938	int offset;
				1939	int error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1940
				1941	ASSERT(ip->i_d.di_nlink == 0);
				1942	ASSERT(ip->i_d.di_mode != 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1943
				1944	mp = tp->t_mountp;
				1945
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1946	/*
				1947	* Get the agi buffer first. It ensures lock ordering
				1948	* on the list.
				1949	*/
Christoph Hellwig	5e1be0f	2008-11-28 14:23:37 +1100	[diff] [blame]	1950	error = xfs_read_agi(mp, tp, XFS_INO_TO_AGNO(mp, ip->i_ino), &agibp);
Vlad Apostolov	859d718	2007-10-11 17:44:18 +1000	[diff] [blame]	1951	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1952	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1953	agi = XFS_BUF_TO_AGI(agibp);
Christoph Hellwig	5e1be0f	2008-11-28 14:23:37 +1100	[diff] [blame]	1954
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1955	/*
				1956	* Get the index into the agi hash table for the
				1957	* list this inode will go on.
				1958	*/
				1959	agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
				1960	ASSERT(agino != 0);
				1961	bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
				1962	ASSERT(agi->agi_unlinked[bucket_index]);
Christoph Hellwig	16259e7	2005-11-02 15:11:25 +1100	[diff] [blame]	1963	ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1964
Christoph Hellwig	69ef921	2011-07-08 14:36:05 +0200	[diff] [blame]	1965	if (agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1966	/*
				1967	* There is already another inode in the bucket we need
				1968	* to add ourselves to. Add us at the front of the list.
				1969	* Here we put the head pointer into our next pointer,
				1970	* and then we fall through to point the head at us.
				1971	*/
Christoph Hellwig	475ee41	2012-07-03 12:21:22 -0400	[diff] [blame]	1972	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp,
				1973	0, 0);
Vlad Apostolov	c319b58	2007-11-23 16:27:51 +1100	[diff] [blame]	1974	if (error)
				1975	return error;
				1976
Christoph Hellwig	69ef921	2011-07-08 14:36:05 +0200	[diff] [blame]	1977	ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1978	dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
Christoph Hellwig	92bfc6e	2008-11-28 14:23:41 +1100	[diff] [blame]	1979	offset = ip->i_imap.im_boffset +
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1980	offsetof(xfs_dinode_t, di_next_unlinked);
Dave Chinner	0a32c26	2013-06-05 12:09:08 +1000	[diff] [blame]	1981
				1982	/* need to recalc the inode CRC if appropriate */
				1983	xfs_dinode_calc_crc(mp, dip);
				1984
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1985	xfs_trans_inode_buf(tp, ibp);
				1986	xfs_trans_log_buf(tp, ibp, offset,
				1987	(offset + sizeof(xfs_agino_t) - 1));
				1988	xfs_inobp_check(mp, ibp);
				1989	}
				1990
				1991	/*
				1992	* Point the bucket head pointer at the inode being inserted.
				1993	*/
				1994	ASSERT(agino != 0);
Christoph Hellwig	16259e7	2005-11-02 15:11:25 +1100	[diff] [blame]	1995	agi->agi_unlinked[bucket_index] = cpu_to_be32(agino);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1996	offset = offsetof(xfs_agi_t, agi_unlinked) +
				1997	(sizeof(xfs_agino_t) * bucket_index);
				1998	xfs_trans_log_buf(tp, agibp, offset,
				1999	(offset + sizeof(xfs_agino_t) - 1));
				2000	return 0;
				2001	}
				2002
				2003	/*
				2004	* Pull the on-disk inode from the AGI unlinked list.
				2005	*/
				2006	STATIC int
				2007	xfs_iunlink_remove(
				2008	xfs_trans_t *tp,
				2009	xfs_inode_t *ip)
				2010	{
				2011	xfs_ino_t next_ino;
				2012	xfs_mount_t *mp;
				2013	xfs_agi_t *agi;
				2014	xfs_dinode_t *dip;
				2015	xfs_buf_t *agibp;
				2016	xfs_buf_t *ibp;
				2017	xfs_agnumber_t agno;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2018	xfs_agino_t agino;
				2019	xfs_agino_t next_agino;
				2020	xfs_buf_t *last_ibp;
Nathan Scott	6fdf8cc	2006-06-28 10:13:52 +1000	[diff] [blame]	2021	xfs_dinode_t *last_dip = NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2022	short bucket_index;
Nathan Scott	6fdf8cc	2006-06-28 10:13:52 +1000	[diff] [blame]	2023	int offset, last_offset = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2024	int error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2025
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2026	mp = tp->t_mountp;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2027	agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2028
				2029	/*
				2030	* Get the agi buffer first. It ensures lock ordering
				2031	* on the list.
				2032	*/
Christoph Hellwig	5e1be0f	2008-11-28 14:23:37 +1100	[diff] [blame]	2033	error = xfs_read_agi(mp, tp, agno, &agibp);
				2034	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2035	return error;
Christoph Hellwig	5e1be0f	2008-11-28 14:23:37 +1100	[diff] [blame]	2036
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2037	agi = XFS_BUF_TO_AGI(agibp);
Christoph Hellwig	5e1be0f	2008-11-28 14:23:37 +1100	[diff] [blame]	2038
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2039	/*
				2040	* Get the index into the agi hash table for the
				2041	* list this inode will go on.
				2042	*/
				2043	agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
				2044	ASSERT(agino != 0);
				2045	bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
Christoph Hellwig	69ef921	2011-07-08 14:36:05 +0200	[diff] [blame]	2046	ASSERT(agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2047	ASSERT(agi->agi_unlinked[bucket_index]);
				2048
Christoph Hellwig	16259e7	2005-11-02 15:11:25 +1100	[diff] [blame]	2049	if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2050	/*
Christoph Hellwig	475ee41	2012-07-03 12:21:22 -0400	[diff] [blame]	2051	* We're at the head of the list. Get the inode's on-disk
				2052	* buffer to see if there is anyone after us on the list.
				2053	* Only modify our next pointer if it is not already NULLAGINO.
				2054	* This saves us the overhead of dealing with the buffer when
				2055	* there is no need to change it.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2056	*/
Christoph Hellwig	475ee41	2012-07-03 12:21:22 -0400	[diff] [blame]	2057	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp,
				2058	0, 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2059	if (error) {
Christoph Hellwig	475ee41	2012-07-03 12:21:22 -0400	[diff] [blame]	2060	xfs_warn(mp, "%s: xfs_imap_to_bp returned error %d.",
Dave Chinner	0b932cc	2011-03-07 10:08:35 +1100	[diff] [blame]	2061	__func__, error);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2062	return error;
				2063	}
Christoph Hellwig	347d1c0	2007-08-28 13:57:51 +1000	[diff] [blame]	2064	next_agino = be32_to_cpu(dip->di_next_unlinked);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2065	ASSERT(next_agino != 0);
				2066	if (next_agino != NULLAGINO) {
Christoph Hellwig	347d1c0	2007-08-28 13:57:51 +1000	[diff] [blame]	2067	dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
Christoph Hellwig	92bfc6e	2008-11-28 14:23:41 +1100	[diff] [blame]	2068	offset = ip->i_imap.im_boffset +
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2069	offsetof(xfs_dinode_t, di_next_unlinked);
Dave Chinner	0a32c26	2013-06-05 12:09:08 +1000	[diff] [blame]	2070
				2071	/* need to recalc the inode CRC if appropriate */
				2072	xfs_dinode_calc_crc(mp, dip);
				2073
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2074	xfs_trans_inode_buf(tp, ibp);
				2075	xfs_trans_log_buf(tp, ibp, offset,
				2076	(offset + sizeof(xfs_agino_t) - 1));
				2077	xfs_inobp_check(mp, ibp);
				2078	} else {
				2079	xfs_trans_brelse(tp, ibp);
				2080	}
				2081	/*
				2082	* Point the bucket head pointer at the next inode.
				2083	*/
				2084	ASSERT(next_agino != 0);
				2085	ASSERT(next_agino != agino);
Christoph Hellwig	16259e7	2005-11-02 15:11:25 +1100	[diff] [blame]	2086	agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2087	offset = offsetof(xfs_agi_t, agi_unlinked) +
				2088	(sizeof(xfs_agino_t) * bucket_index);
				2089	xfs_trans_log_buf(tp, agibp, offset,
				2090	(offset + sizeof(xfs_agino_t) - 1));
				2091	} else {
				2092	/*
				2093	* We need to search the list for the inode being freed.
				2094	*/
Christoph Hellwig	16259e7	2005-11-02 15:11:25 +1100	[diff] [blame]	2095	next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2096	last_ibp = NULL;
				2097	while (next_agino != agino) {
Christoph Hellwig	129dbc9	2012-07-03 12:21:51 -0400	[diff] [blame]	2098	struct xfs_imap imap;
				2099
				2100	if (last_ibp)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2101	xfs_trans_brelse(tp, last_ibp);
Christoph Hellwig	129dbc9	2012-07-03 12:21:51 -0400	[diff] [blame]	2102
				2103	imap.im_blkno = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2104	next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino);
Christoph Hellwig	129dbc9	2012-07-03 12:21:51 -0400	[diff] [blame]	2105
				2106	error = xfs_imap(mp, tp, next_ino, &imap, 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2107	if (error) {
Dave Chinner	0b932cc	2011-03-07 10:08:35 +1100	[diff] [blame]	2108	xfs_warn(mp,
Christoph Hellwig	129dbc9	2012-07-03 12:21:51 -0400	[diff] [blame]	2109	"%s: xfs_imap returned error %d.",
				2110	__func__, error);
				2111	return error;
				2112	}
				2113
				2114	error = xfs_imap_to_bp(mp, tp, &imap, &last_dip,
				2115	&last_ibp, 0, 0);
				2116	if (error) {
				2117	xfs_warn(mp,
				2118	"%s: xfs_imap_to_bp returned error %d.",
Dave Chinner	0b932cc	2011-03-07 10:08:35 +1100	[diff] [blame]	2119	__func__, error);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2120	return error;
				2121	}
Christoph Hellwig	129dbc9	2012-07-03 12:21:51 -0400	[diff] [blame]	2122
				2123	last_offset = imap.im_boffset;
Christoph Hellwig	347d1c0	2007-08-28 13:57:51 +1000	[diff] [blame]	2124	next_agino = be32_to_cpu(last_dip->di_next_unlinked);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2125	ASSERT(next_agino != NULLAGINO);
				2126	ASSERT(next_agino != 0);
				2127	}
Christoph Hellwig	475ee41	2012-07-03 12:21:22 -0400	[diff] [blame]	2128
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2129	/*
Christoph Hellwig	475ee41	2012-07-03 12:21:22 -0400	[diff] [blame]	2130	* Now last_ibp points to the buffer previous to us on the
				2131	* unlinked list. Pull us from the list.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2132	*/
Christoph Hellwig	475ee41	2012-07-03 12:21:22 -0400	[diff] [blame]	2133	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp,
				2134	0, 0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2135	if (error) {
Christoph Hellwig	475ee41	2012-07-03 12:21:22 -0400	[diff] [blame]	2136	xfs_warn(mp, "%s: xfs_imap_to_bp(2) returned error %d.",
Dave Chinner	0b932cc	2011-03-07 10:08:35 +1100	[diff] [blame]	2137	__func__, error);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2138	return error;
				2139	}
Christoph Hellwig	347d1c0	2007-08-28 13:57:51 +1000	[diff] [blame]	2140	next_agino = be32_to_cpu(dip->di_next_unlinked);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2141	ASSERT(next_agino != 0);
				2142	ASSERT(next_agino != agino);
				2143	if (next_agino != NULLAGINO) {
Christoph Hellwig	347d1c0	2007-08-28 13:57:51 +1000	[diff] [blame]	2144	dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
Christoph Hellwig	92bfc6e	2008-11-28 14:23:41 +1100	[diff] [blame]	2145	offset = ip->i_imap.im_boffset +
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2146	offsetof(xfs_dinode_t, di_next_unlinked);
Dave Chinner	0a32c26	2013-06-05 12:09:08 +1000	[diff] [blame]	2147
				2148	/* need to recalc the inode CRC if appropriate */
				2149	xfs_dinode_calc_crc(mp, dip);
				2150
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2151	xfs_trans_inode_buf(tp, ibp);
				2152	xfs_trans_log_buf(tp, ibp, offset,
				2153	(offset + sizeof(xfs_agino_t) - 1));
				2154	xfs_inobp_check(mp, ibp);
				2155	} else {
				2156	xfs_trans_brelse(tp, ibp);
				2157	}
				2158	/*
				2159	* Point the previous inode on the list to the next inode.
				2160	*/
Christoph Hellwig	347d1c0	2007-08-28 13:57:51 +1000	[diff] [blame]	2161	last_dip->di_next_unlinked = cpu_to_be32(next_agino);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2162	ASSERT(next_agino != 0);
				2163	offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked);
Dave Chinner	0a32c26	2013-06-05 12:09:08 +1000	[diff] [blame]	2164
				2165	/* need to recalc the inode CRC if appropriate */
				2166	xfs_dinode_calc_crc(mp, last_dip);
				2167
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2168	xfs_trans_inode_buf(tp, last_ibp);
				2169	xfs_trans_log_buf(tp, last_ibp, offset,
				2170	(offset + sizeof(xfs_agino_t) - 1));
				2171	xfs_inobp_check(mp, last_ibp);
				2172	}
				2173	return 0;
				2174	}
				2175
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2176	/*
Zhi Yong Wu	0b8182d	2013-08-12 03:14:59 +0000	[diff] [blame]	2177	* A big issue when freeing the inode cluster is that we _cannot_ skip any
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2178	* inodes that are in memory - they all must be marked stale and attached to
				2179	* the cluster buffer.
				2180	*/
Chandra Seetharaman	2a30f36d	2011-09-20 13:56:55 +0000	[diff] [blame]	2181	STATIC int
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2182	xfs_ifree_cluster(
				2183	xfs_inode_t *free_ip,
				2184	xfs_trans_t *tp,
				2185	xfs_ino_t inum)
				2186	{
				2187	xfs_mount_t *mp = free_ip->i_mount;
				2188	int blks_per_cluster;
Jie Liu	982e939	2013-12-13 15:51:49 +1100	[diff] [blame]	2189	int inodes_per_cluster;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2190	int nbufs;
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2191	int i, j;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2192	xfs_daddr_t blkno;
				2193	xfs_buf_t *bp;
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2194	xfs_inode_t *ip;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2195	xfs_inode_log_item_t *iip;
				2196	xfs_log_item_t *lip;
Dave Chinner	5017e97	2010-01-11 11:47:40 +0000	[diff] [blame]	2197	struct xfs_perag *pag;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2198
Dave Chinner	5017e97	2010-01-11 11:47:40 +0000	[diff] [blame]	2199	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
Jie Liu	982e939	2013-12-13 15:51:49 +1100	[diff] [blame]	2200	blks_per_cluster = xfs_icluster_size_fsb(mp);
				2201	inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog;
				2202	nbufs = mp->m_ialloc_blks / blks_per_cluster;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2203
Jie Liu	982e939	2013-12-13 15:51:49 +1100	[diff] [blame]	2204	for (j = 0; j < nbufs; j++, inum += inodes_per_cluster) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2205	blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
				2206	XFS_INO_TO_AGBNO(mp, inum));
				2207
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2208	/*
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2209	* We obtain and lock the backing buffer first in the process
				2210	* here, as we have to ensure that any dirty inode that we
				2211	* can't get the flush lock on is attached to the buffer.
				2212	* If we scan the in-memory inodes first, then buffer IO can
				2213	* complete before we get a lock on it, and hence we may fail
				2214	* to mark all the active inodes on the buffer stale.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2215	*/
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2216	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
Dave Chinner	b6aff29	2012-11-02 11:38:42 +1100	[diff] [blame]	2217	mp->m_bsize * blks_per_cluster,
				2218	XBF_UNMAPPED);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2219
Chandra Seetharaman	2a30f36d	2011-09-20 13:56:55 +0000	[diff] [blame]	2220	if (!bp)
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2221	return -ENOMEM;
Dave Chinner	b0f539d	2012-11-14 17:53:49 +1100	[diff] [blame]	2222
				2223	/*
				2224	* This buffer may not have been correctly initialised as we
				2225	* didn't read it from disk. That's not important because we are
				2226	* only using to mark the buffer as stale in the log, and to
				2227	* attach stale cached inodes on it. That means it will never be
				2228	* dispatched for IO. If it is, we want to know about it, and we
				2229	* want it to fail. We can acheive this by adding a write
				2230	* verifier to the buffer.
				2231	*/
Dave Chinner	1813dd6	2012-11-14 17:54:40 +1100	[diff] [blame]	2232	bp->b_ops = &xfs_inode_buf_ops;
Dave Chinner	b0f539d	2012-11-14 17:53:49 +1100	[diff] [blame]	2233
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2234	/*
				2235	* Walk the inodes already attached to the buffer and mark them
				2236	* stale. These will all have the flush locks held, so an
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2237	* in-memory inode walk can't lock them. By marking them all
				2238	* stale first, we will not attempt to lock them in the loop
				2239	* below as the XFS_ISTALE flag will be set.
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2240	*/
Christoph Hellwig	adadbee	2011-07-13 13:43:49 +0200	[diff] [blame]	2241	lip = bp->b_fspriv;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2242	while (lip) {
				2243	if (lip->li_type == XFS_LI_INODE) {
				2244	iip = (xfs_inode_log_item_t *)lip;
				2245	ASSERT(iip->ili_logged == 1);
Christoph Hellwig	ca30b2a	2010-06-23 18:11:15 +1000	[diff] [blame]	2246	lip->li_cb = xfs_istale_done;
David Chinner	7b2e2a3	2008-10-30 17:39:12 +1100	[diff] [blame]	2247	xfs_trans_ail_copy_lsn(mp->m_ail,
				2248	&iip->ili_flush_lsn,
				2249	&iip->ili_item.li_lsn);
David Chinner	e5ffd2b	2006-11-21 18:55:33 +1100	[diff] [blame]	2250	xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2251	}
				2252	lip = lip->li_bio_list;
				2253	}
				2254
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2255
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2256	/*
				2257	* For each inode in memory attempt to add it to the inode
				2258	* buffer and set it up for being staled on buffer IO
				2259	* completion. This is safe as we've locked out tail pushing
				2260	* and flushing by locking the buffer.
				2261	*
				2262	* We have already marked every inode that was part of a
				2263	* transaction stale above, which means there is no point in
				2264	* even trying to lock them.
				2265	*/
Jie Liu	982e939	2013-12-13 15:51:49 +1100	[diff] [blame]	2266	for (i = 0; i < inodes_per_cluster; i++) {
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2267	retry:
Dave Chinner	1a3e8f3	2010-12-17 17:29:43 +1100	[diff] [blame]	2268	rcu_read_lock();
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2269	ip = radix_tree_lookup(&pag->pag_ici_root,
				2270	XFS_INO_TO_AGINO(mp, (inum + i)));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2271
Dave Chinner	1a3e8f3	2010-12-17 17:29:43 +1100	[diff] [blame]	2272	/* Inode not in memory, nothing to do */
				2273	if (!ip) {
				2274	rcu_read_unlock();
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2275	continue;
				2276	}
				2277
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2278	/*
Dave Chinner	1a3e8f3	2010-12-17 17:29:43 +1100	[diff] [blame]	2279	* because this is an RCU protected lookup, we could
				2280	* find a recently freed or even reallocated inode
				2281	* during the lookup. We need to check under the
				2282	* i_flags_lock for a valid inode here. Skip it if it
				2283	* is not valid, the wrong inode or stale.
				2284	*/
				2285	spin_lock(&ip->i_flags_lock);
				2286	if (ip->i_ino != inum + i \|\|
				2287	__xfs_iflags_test(ip, XFS_ISTALE)) {
				2288	spin_unlock(&ip->i_flags_lock);
				2289	rcu_read_unlock();
				2290	continue;
				2291	}
				2292	spin_unlock(&ip->i_flags_lock);
				2293
				2294	/*
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2295	* Don't try to lock/unlock the current inode, but we
				2296	* _cannot_ skip the other inodes that we did not find
				2297	* in the list attached to the buffer and are not
				2298	* already marked stale. If we can't lock it, back off
				2299	* and retry.
				2300	*/
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2301	if (ip != free_ip &&
				2302	!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
Dave Chinner	1a3e8f3	2010-12-17 17:29:43 +1100	[diff] [blame]	2303	rcu_read_unlock();
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2304	delay(1);
				2305	goto retry;
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2306	}
Dave Chinner	1a3e8f3	2010-12-17 17:29:43 +1100	[diff] [blame]	2307	rcu_read_unlock();
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2308
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2309	xfs_iflock(ip);
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2310	xfs_iflags_set(ip, XFS_ISTALE);
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2311
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2312	/*
				2313	* we don't need to attach clean inodes or those only
				2314	* with unlogged changes (which we throw away, anyway).
				2315	*/
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2316	iip = ip->i_itemp;
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2317	if (!iip \|\| xfs_inode_clean(ip)) {
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2318	ASSERT(ip != free_ip);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2319	xfs_ifunlock(ip);
				2320	xfs_iunlock(ip, XFS_ILOCK_EXCL);
				2321	continue;
				2322	}
				2323
Christoph Hellwig	f5d8d5c	2012-02-29 09:53:54 +0000	[diff] [blame]	2324	iip->ili_last_fields = iip->ili_fields;
				2325	iip->ili_fields = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2326	iip->ili_logged = 1;
David Chinner	7b2e2a3	2008-10-30 17:39:12 +1100	[diff] [blame]	2327	xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
				2328	&iip->ili_item.li_lsn);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2329
Christoph Hellwig	ca30b2a	2010-06-23 18:11:15 +1000	[diff] [blame]	2330	xfs_buf_attach_iodone(bp, xfs_istale_done,
				2331	&iip->ili_item);
Dave Chinner	5b257b4	2010-06-03 16:22:29 +1000	[diff] [blame]	2332
				2333	if (ip != free_ip)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2334	xfs_iunlock(ip, XFS_ILOCK_EXCL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2335	}
				2336
Dave Chinner	5b3eed7	2010-08-24 11:42:41 +1000	[diff] [blame]	2337	xfs_trans_stale_inode_buf(tp, bp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2338	xfs_trans_binval(tp, bp);
				2339	}
				2340
Dave Chinner	5017e97	2010-01-11 11:47:40 +0000	[diff] [blame]	2341	xfs_perag_put(pag);
Chandra Seetharaman	2a30f36d	2011-09-20 13:56:55 +0000	[diff] [blame]	2342	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2343	}
				2344
				2345	/*
				2346	* This is called to return an inode to the inode free list.
				2347	* The inode should already be truncated to 0 length and have
				2348	* no pages associated with it. This routine also assumes that
				2349	* the inode is already a part of the transaction.
				2350	*
				2351	* The on-disk copy of the inode will have been added to the list
				2352	* of unlinked inodes in the AGI. We need to remove the inode from
				2353	* that list atomically with respect to freeing it here.
				2354	*/
				2355	int
				2356	xfs_ifree(
				2357	xfs_trans_t *tp,
				2358	xfs_inode_t *ip,
				2359	xfs_bmap_free_t *flist)
				2360	{
				2361	int error;
				2362	int delete;
				2363	xfs_ino_t first_ino;
				2364
Christoph Hellwig	579aa9c	2008-04-22 17:34:00 +1000	[diff] [blame]	2365	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2366	ASSERT(ip->i_d.di_nlink == 0);
				2367	ASSERT(ip->i_d.di_nextents == 0);
				2368	ASSERT(ip->i_d.di_anextents == 0);
Christoph Hellwig	ce7ae151	2011-12-18 20:00:11 +0000	[diff] [blame]	2369	ASSERT(ip->i_d.di_size == 0 \|\| !S_ISREG(ip->i_d.di_mode));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2370	ASSERT(ip->i_d.di_nblocks == 0);
				2371
				2372	/*
				2373	* Pull the on-disk inode from the AGI unlinked list.
				2374	*/
				2375	error = xfs_iunlink_remove(tp, ip);
Dave Chinner	1baaed8	2013-06-27 16:04:50 +1000	[diff] [blame]	2376	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2377	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2378
				2379	error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino);
Dave Chinner	1baaed8	2013-06-27 16:04:50 +1000	[diff] [blame]	2380	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2381	return error;
Dave Chinner	1baaed8	2013-06-27 16:04:50 +1000	[diff] [blame]	2382
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2383	ip->i_d.di_mode = 0; /* mark incore inode as free */
				2384	ip->i_d.di_flags = 0;
				2385	ip->i_d.di_dmevmask = 0;
				2386	ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2387	ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
				2388	ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
				2389	/*
				2390	* Bump the generation count so no one will be confused
				2391	* by reincarnations of this inode.
				2392	*/
				2393	ip->i_d.di_gen++;
				2394	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
				2395
Dave Chinner	1baaed8	2013-06-27 16:04:50 +1000	[diff] [blame]	2396	if (delete)
Chandra Seetharaman	2a30f36d	2011-09-20 13:56:55 +0000	[diff] [blame]	2397	error = xfs_ifree_cluster(ip, tp, first_ino);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2398
Chandra Seetharaman	2a30f36d	2011-09-20 13:56:55 +0000	[diff] [blame]	2399	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2400	}
				2401
				2402	/*
Christoph Hellwig	60ec6783	2010-02-17 19:43:56 +0000	[diff] [blame]	2403	* This is called to unpin an inode. The caller must have the inode locked
				2404	* in at least shared mode so that the buffer cannot be subsequently pinned
				2405	* once someone is waiting for it to be unpinned.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2406	*/
Christoph Hellwig	60ec6783	2010-02-17 19:43:56 +0000	[diff] [blame]	2407	static void
Christoph Hellwig	f392e63	2011-12-18 20:00:10 +0000	[diff] [blame]	2408	xfs_iunpin(
Christoph Hellwig	60ec6783	2010-02-17 19:43:56 +0000	[diff] [blame]	2409	struct xfs_inode *ip)
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	2410	{
Christoph Hellwig	579aa9c	2008-04-22 17:34:00 +1000	[diff] [blame]	2411	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL\|XFS_ILOCK_SHARED));
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	2412
Dave Chinner	4aaf15d	2010-03-08 11:24:07 +1100	[diff] [blame]	2413	trace_xfs_inode_unpin_nowait(ip, _RET_IP_);
				2414
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	2415	/* Give the log a push to start the unpinning I/O */
Christoph Hellwig	60ec6783	2010-02-17 19:43:56 +0000	[diff] [blame]	2416	xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0);
Christoph Hellwig	a14a348	2010-01-19 09:56:46 +0000	[diff] [blame]	2417
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	2418	}
				2419
Christoph Hellwig	f392e63	2011-12-18 20:00:10 +0000	[diff] [blame]	2420	static void
				2421	__xfs_iunpin_wait(
				2422	struct xfs_inode *ip)
				2423	{
				2424	wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IPINNED_BIT);
				2425	DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IPINNED_BIT);
				2426
				2427	xfs_iunpin(ip);
				2428
				2429	do {
				2430	prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
				2431	if (xfs_ipincount(ip))
				2432	io_schedule();
				2433	} while (xfs_ipincount(ip));
				2434	finish_wait(wq, &wait.wait);
				2435	}
				2436
Dave Chinner	777df5a	2010-02-06 12:37:26 +1100	[diff] [blame]	2437	void
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2438	xfs_iunpin_wait(
Christoph Hellwig	60ec6783	2010-02-17 19:43:56 +0000	[diff] [blame]	2439	struct xfs_inode *ip)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2440	{
Christoph Hellwig	f392e63	2011-12-18 20:00:10 +0000	[diff] [blame]	2441	if (xfs_ipincount(ip))
				2442	__xfs_iunpin_wait(ip);
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	2443	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2444
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2445	/*
				2446	* Removing an inode from the namespace involves removing the directory entry
				2447	* and dropping the link count on the inode. Removing the directory entry can
				2448	* result in locking an AGF (directory blocks were freed) and removing a link
				2449	* count can result in placing the inode on an unlinked list which results in
				2450	* locking an AGI.
				2451	*
				2452	* The big problem here is that we have an ordering constraint on AGF and AGI
				2453	* locking - inode allocation locks the AGI, then can allocate a new extent for
				2454	* new inodes, locking the AGF after the AGI. Similarly, freeing the inode
				2455	* removes the inode from the unlinked list, requiring that we lock the AGI
				2456	* first, and then freeing the inode can result in an inode chunk being freed
				2457	* and hence freeing disk space requiring that we lock an AGF.
				2458	*
				2459	* Hence the ordering that is imposed by other parts of the code is AGI before
				2460	* AGF. This means we cannot remove the directory entry before we drop the inode
				2461	* reference count and put it on the unlinked list as this results in a lock
				2462	* order of AGF then AGI, and this can deadlock against inode allocation and
				2463	* freeing. Therefore we must drop the link counts before we remove the
				2464	* directory entry.
				2465	*
				2466	* This is still safe from a transactional point of view - it is not until we
				2467	* get to xfs_bmap_finish() that we have the possibility of multiple
				2468	* transactions in this operation. Hence as long as we remove the directory
				2469	* entry and drop the link count in the first transaction of the remove
				2470	* operation, there are no transactional constraints on the ordering here.
				2471	*/
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2472	int
				2473	xfs_remove(
				2474	xfs_inode_t *dp,
				2475	struct xfs_name *name,
				2476	xfs_inode_t *ip)
				2477	{
				2478	xfs_mount_t *mp = dp->i_mount;
				2479	xfs_trans_t *tp = NULL;
				2480	int is_dir = S_ISDIR(ip->i_d.di_mode);
				2481	int error = 0;
				2482	xfs_bmap_free_t free_list;
				2483	xfs_fsblock_t first_block;
				2484	int cancel_flags;
				2485	int committed;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2486	uint resblks;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2487
				2488	trace_xfs_remove(dp, name);
				2489
				2490	if (XFS_FORCED_SHUTDOWN(mp))
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2491	return -EIO;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2492
				2493	error = xfs_qm_dqattach(dp, 0);
				2494	if (error)
				2495	goto std_return;
				2496
				2497	error = xfs_qm_dqattach(ip, 0);
				2498	if (error)
				2499	goto std_return;
				2500
Dave Chinner	32296f8	2014-12-04 09:43:17 +1100	[diff] [blame]	2501	if (is_dir)
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2502	tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
Dave Chinner	32296f8	2014-12-04 09:43:17 +1100	[diff] [blame]	2503	else
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2504	tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2505	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
				2506
				2507	/*
				2508	* We try to get the real space reservation first,
				2509	* allowing for directory btree deletion(s) implying
				2510	* possible bmap insert(s). If we can't get the space
				2511	* reservation then we use 0 instead, and avoid the bmap
				2512	* btree insert(s) in the directory code by, if the bmap
				2513	* insert tries to happen, instead trimming the LAST
				2514	* block from the directory.
				2515	*/
				2516	resblks = XFS_REMOVE_SPACE_RES(mp);
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	2517	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, resblks, 0);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2518	if (error == -ENOSPC) {
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2519	resblks = 0;
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	2520	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, 0, 0);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2521	}
				2522	if (error) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2523	ASSERT(error != -ENOSPC);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2524	cancel_flags = 0;
				2525	goto out_trans_cancel;
				2526	}
				2527
				2528	xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
				2529
				2530	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
				2531	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
				2532
				2533	/*
				2534	* If we're removing a directory perform some additional validation.
				2535	*/
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2536	cancel_flags \|= XFS_TRANS_ABORT;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2537	if (is_dir) {
				2538	ASSERT(ip->i_d.di_nlink >= 2);
				2539	if (ip->i_d.di_nlink != 2) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2540	error = -ENOTEMPTY;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2541	goto out_trans_cancel;
				2542	}
				2543	if (!xfs_dir_isempty(ip)) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2544	error = -ENOTEMPTY;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2545	goto out_trans_cancel;
				2546	}
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2547
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2548	/* Drop the link from ip's "..". */
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2549	error = xfs_droplink(tp, dp);
				2550	if (error)
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2551	goto out_trans_cancel;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2552
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2553	/* Drop the "." link from ip to self. */
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2554	error = xfs_droplink(tp, ip);
				2555	if (error)
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2556	goto out_trans_cancel;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2557	} else {
				2558	/*
				2559	* When removing a non-directory we need to log the parent
				2560	* inode here. For a directory this is done implicitly
				2561	* by the xfs_droplink call for the ".." entry.
				2562	*/
				2563	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
				2564	}
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2565	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2566
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2567	/* Drop the link from dp to ip. */
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2568	error = xfs_droplink(tp, ip);
				2569	if (error)
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2570	goto out_trans_cancel;
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2571
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2572	xfs_bmap_init(&free_list, &first_block);
				2573	error = xfs_dir_removename(tp, dp, name, ip->i_ino,
				2574	&first_block, &free_list, resblks);
				2575	if (error) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2576	ASSERT(error != -ENOENT);
Dave Chinner	2732036	2013-10-29 22:11:44 +1100	[diff] [blame]	2577	goto out_bmap_cancel;
				2578	}
				2579
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2580	/*
				2581	* If this is a synchronous mount, make sure that the
				2582	* remove transaction goes to disk before returning to
				2583	* the user.
				2584	*/
				2585	if (mp->m_flags & (XFS_MOUNT_WSYNC\|XFS_MOUNT_DIRSYNC))
				2586	xfs_trans_set_sync(tp);
				2587
				2588	error = xfs_bmap_finish(&tp, &free_list, &committed);
				2589	if (error)
				2590	goto out_bmap_cancel;
				2591
				2592	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
				2593	if (error)
				2594	goto std_return;
				2595
Christoph Hellwig	2cd2ef6	2014-04-23 07:11:51 +1000	[diff] [blame]	2596	if (is_dir && xfs_inode_is_filestream(ip))
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2597	xfs_filestream_deassociate(ip);
				2598
				2599	return 0;
				2600
				2601	out_bmap_cancel:
				2602	xfs_bmap_cancel(&free_list);
Dave Chinner	c24b5df	2013-08-12 20:49:45 +1000	[diff] [blame]	2603	out_trans_cancel:
				2604	xfs_trans_cancel(tp, cancel_flags);
				2605	std_return:
				2606	return error;
				2607	}
				2608
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2609	/*
				2610	* Enter all inodes for a rename transaction into a sorted array.
				2611	*/
				2612	STATIC void
				2613	xfs_sort_for_rename(
				2614	xfs_inode_t dp1, / in: old (source) directory inode */
				2615	xfs_inode_t dp2, / in: new (target) directory inode */
				2616	xfs_inode_t ip1, / in: inode of old entry */
				2617	xfs_inode_t ip2, / in: inode of new entry, if it
				2618	already exists, NULL otherwise. */
				2619	xfs_inode_t *i_tab,/ out: array of inode returned, sorted */
				2620	int num_inodes) / out: number of inodes in array */
				2621	{
				2622	xfs_inode_t *temp;
				2623	int i, j;
				2624
				2625	/*
				2626	* i_tab contains a list of pointers to inodes. We initialize
				2627	* the table here & we'll sort it. We will then use it to
				2628	* order the acquisition of the inode locks.
				2629	*
				2630	* Note that the table may contain duplicates. e.g., dp1 == dp2.
				2631	*/
				2632	i_tab[0] = dp1;
				2633	i_tab[1] = dp2;
				2634	i_tab[2] = ip1;
				2635	if (ip2) {
				2636	*num_inodes = 4;
				2637	i_tab[3] = ip2;
				2638	} else {
				2639	*num_inodes = 3;
				2640	i_tab[3] = NULL;
				2641	}
				2642
				2643	/*
				2644	* Sort the elements via bubble sort. (Remember, there are at
				2645	* most 4 elements to sort, so this is adequate.)
				2646	*/
				2647	for (i = 0; i < *num_inodes; i++) {
				2648	for (j = 1; j < *num_inodes; j++) {
				2649	if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) {
				2650	temp = i_tab[j];
				2651	i_tab[j] = i_tab[j-1];
				2652	i_tab[j-1] = temp;
				2653	}
				2654	}
				2655	}
				2656	}
				2657
				2658	/*
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame^]	2659	* xfs_cross_rename()
				2660	*
				2661	* responsible for handling RENAME_EXCHANGE flag in renameat2() sytemcall
				2662	*/
				2663	STATIC int
				2664	xfs_cross_rename(
				2665	struct xfs_trans *tp,
				2666	struct xfs_inode *dp1,
				2667	struct xfs_name *name1,
				2668	struct xfs_inode *ip1,
				2669	struct xfs_inode *dp2,
				2670	struct xfs_name *name2,
				2671	struct xfs_inode *ip2,
				2672	struct xfs_bmap_free *free_list,
				2673	xfs_fsblock_t *first_block,
				2674	int spaceres)
				2675	{
				2676	int error = 0;
				2677	int ip1_flags = 0;
				2678	int ip2_flags = 0;
				2679	int dp2_flags = 0;
				2680
				2681	/* Swap inode number for dirent in first parent */
				2682	error = xfs_dir_replace(tp, dp1, name1,
				2683	ip2->i_ino,
				2684	first_block, free_list, spaceres);
				2685	if (error)
				2686	goto out;
				2687
				2688	/* Swap inode number for dirent in second parent */
				2689	error = xfs_dir_replace(tp, dp2, name2,
				2690	ip1->i_ino,
				2691	first_block, free_list, spaceres);
				2692	if (error)
				2693	goto out;
				2694
				2695	/*
				2696	* If we're renaming one or more directories across different parents,
				2697	* update the respective ".." entries (and link counts) to match the new
				2698	* parents.
				2699	*/
				2700	if (dp1 != dp2) {
				2701	dp2_flags = XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG;
				2702
				2703	if (S_ISDIR(ip2->i_d.di_mode)) {
				2704	error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot,
				2705	dp1->i_ino, first_block,
				2706	free_list, spaceres);
				2707	if (error)
				2708	goto out;
				2709
				2710	/* transfer ip2 ".." reference to dp1 */
				2711	if (!S_ISDIR(ip1->i_d.di_mode)) {
				2712	error = xfs_droplink(tp, dp2);
				2713	if (error)
				2714	goto out;
				2715	error = xfs_bumplink(tp, dp1);
				2716	if (error)
				2717	goto out;
				2718	}
				2719
				2720	/*
				2721	* Although ip1 isn't changed here, userspace needs
				2722	* to be warned about the change, so that applications
				2723	* relying on it (like backup ones), will properly
				2724	* notify the change
				2725	*/
				2726	ip1_flags \|= XFS_ICHGTIME_CHG;
				2727	ip2_flags \|= XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG;
				2728	}
				2729
				2730	if (S_ISDIR(ip1->i_d.di_mode)) {
				2731	error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot,
				2732	dp2->i_ino, first_block,
				2733	free_list, spaceres);
				2734	if (error)
				2735	goto out;
				2736
				2737	/* transfer ip1 ".." reference to dp2 */
				2738	if (!S_ISDIR(ip2->i_d.di_mode)) {
				2739	error = xfs_droplink(tp, dp1);
				2740	if (error)
				2741	goto out;
				2742	error = xfs_bumplink(tp, dp2);
				2743	if (error)
				2744	goto out;
				2745	}
				2746
				2747	/*
				2748	* Although ip2 isn't changed here, userspace needs
				2749	* to be warned about the change, so that applications
				2750	* relying on it (like backup ones), will properly
				2751	* notify the change
				2752	*/
				2753	ip1_flags \|= XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG;
				2754	ip2_flags \|= XFS_ICHGTIME_CHG;
				2755	}
				2756	}
				2757
				2758	if (ip1_flags) {
				2759	xfs_trans_ichgtime(tp, ip1, ip1_flags);
				2760	xfs_trans_log_inode(tp, ip1, XFS_ILOG_CORE);
				2761	}
				2762	if (ip2_flags) {
				2763	xfs_trans_ichgtime(tp, ip2, ip2_flags);
				2764	xfs_trans_log_inode(tp, ip2, XFS_ILOG_CORE);
				2765	}
				2766	if (dp2_flags) {
				2767	xfs_trans_ichgtime(tp, dp2, dp2_flags);
				2768	xfs_trans_log_inode(tp, dp2, XFS_ILOG_CORE);
				2769	}
				2770	xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				2771	xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE);
				2772	out:
				2773	return error;
				2774	}
				2775
				2776	/*
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2777	* xfs_rename
				2778	*/
				2779	int
				2780	xfs_rename(
				2781	xfs_inode_t *src_dp,
				2782	struct xfs_name *src_name,
				2783	xfs_inode_t *src_ip,
				2784	xfs_inode_t *target_dp,
				2785	struct xfs_name *target_name,
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame^]	2786	xfs_inode_t *target_ip,
				2787	unsigned int flags)
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2788	{
				2789	xfs_trans_t *tp = NULL;
				2790	xfs_mount_t *mp = src_dp->i_mount;
				2791	int new_parent; /* moving to a new dir */
				2792	int src_is_directory; /* src_name is a directory */
				2793	int error;
				2794	xfs_bmap_free_t free_list;
				2795	xfs_fsblock_t first_block;
				2796	int cancel_flags;
				2797	int committed;
				2798	xfs_inode_t *inodes[4];
				2799	int spaceres;
				2800	int num_inodes;
				2801
				2802	trace_xfs_rename(src_dp, target_dp, src_name, target_name);
				2803
				2804	new_parent = (src_dp != target_dp);
				2805	src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
				2806
				2807	xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip,
				2808	inodes, &num_inodes);
				2809
				2810	xfs_bmap_init(&free_list, &first_block);
				2811	tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
				2812	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
				2813	spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	2814	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2815	if (error == -ENOSPC) {
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2816	spaceres = 0;
Jie Liu	3d3c8b5	2013-08-12 20:49:59 +1000	[diff] [blame]	2817	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2818	}
				2819	if (error) {
				2820	xfs_trans_cancel(tp, 0);
				2821	goto std_return;
				2822	}
				2823
				2824	/*
				2825	* Attach the dquots to the inodes
				2826	*/
				2827	error = xfs_qm_vop_rename_dqattach(inodes);
				2828	if (error) {
				2829	xfs_trans_cancel(tp, cancel_flags);
				2830	goto std_return;
				2831	}
				2832
				2833	/*
				2834	* Lock all the participating inodes. Depending upon whether
				2835	* the target_name exists in the target directory, and
				2836	* whether the target directory is the same as the source
				2837	* directory, we can lock from 2 to 4 inodes.
				2838	*/
				2839	xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
				2840
				2841	/*
				2842	* Join all the inodes to the transaction. From this point on,
				2843	* we can rely on either trans_commit or trans_cancel to unlock
				2844	* them.
				2845	*/
				2846	xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
				2847	if (new_parent)
				2848	xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
				2849	xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
				2850	if (target_ip)
				2851	xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
				2852
				2853	/*
				2854	* If we are using project inheritance, we only allow renames
				2855	* into our tree when the project IDs are the same; else the
				2856	* tree quota mechanism would be circumvented.
				2857	*/
				2858	if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
				2859	(xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2860	error = -EXDEV;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2861	goto error_return;
				2862	}
				2863
				2864	/*
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame^]	2865	* Handle RENAME_EXCHANGE flags
				2866	*/
				2867	if (flags & RENAME_EXCHANGE) {
				2868	error = xfs_cross_rename(tp, src_dp, src_name, src_ip,
				2869	target_dp, target_name, target_ip,
				2870	&free_list, &first_block, spaceres);
				2871	if (error)
				2872	goto abort_return;
				2873	goto finish_rename;
				2874	}
				2875
				2876	/*
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2877	* Set up the target.
				2878	*/
				2879	if (target_ip == NULL) {
				2880	/*
				2881	* If there's no space reservation, check the entry will
				2882	* fit before actually inserting it.
				2883	*/
Eric Sandeen	94f3cad	2014-09-09 11:57:52 +1000	[diff] [blame]	2884	if (!spaceres) {
				2885	error = xfs_dir_canenter(tp, target_dp, target_name);
				2886	if (error)
				2887	goto error_return;
				2888	}
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2889	/*
				2890	* If target does not exist and the rename crosses
				2891	* directories, adjust the target directory link count
				2892	* to account for the ".." reference from the new entry.
				2893	*/
				2894	error = xfs_dir_createname(tp, target_dp, target_name,
				2895	src_ip->i_ino, &first_block,
				2896	&free_list, spaceres);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2897	if (error == -ENOSPC)
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2898	goto error_return;
				2899	if (error)
				2900	goto abort_return;
				2901
				2902	xfs_trans_ichgtime(tp, target_dp,
				2903	XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				2904
				2905	if (new_parent && src_is_directory) {
				2906	error = xfs_bumplink(tp, target_dp);
				2907	if (error)
				2908	goto abort_return;
				2909	}
				2910	} else { /* target_ip != NULL */
				2911	/*
				2912	* If target exists and it's a directory, check that both
				2913	* target and source are directories and that target can be
				2914	* destroyed, or that neither is a directory.
				2915	*/
				2916	if (S_ISDIR(target_ip->i_d.di_mode)) {
				2917	/*
				2918	* Make sure target dir is empty.
				2919	*/
				2920	if (!(xfs_dir_isempty(target_ip)) \|\|
				2921	(target_ip->i_d.di_nlink > 2)) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2922	error = -EEXIST;
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2923	goto error_return;
				2924	}
				2925	}
				2926
				2927	/*
				2928	* Link the source inode under the target name.
				2929	* If the source inode is a directory and we are moving
				2930	* it across directories, its ".." entry will be
				2931	* inconsistent until we replace that down below.
				2932	*
				2933	* In case there is already an entry with the same
				2934	* name at the destination directory, remove it first.
				2935	*/
				2936	error = xfs_dir_replace(tp, target_dp, target_name,
				2937	src_ip->i_ino,
				2938	&first_block, &free_list, spaceres);
				2939	if (error)
				2940	goto abort_return;
				2941
				2942	xfs_trans_ichgtime(tp, target_dp,
				2943	XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				2944
				2945	/*
				2946	* Decrement the link count on the target since the target
				2947	* dir no longer points to it.
				2948	*/
				2949	error = xfs_droplink(tp, target_ip);
				2950	if (error)
				2951	goto abort_return;
				2952
				2953	if (src_is_directory) {
				2954	/*
				2955	* Drop the link from the old "." entry.
				2956	*/
				2957	error = xfs_droplink(tp, target_ip);
				2958	if (error)
				2959	goto abort_return;
				2960	}
				2961	} /* target_ip != NULL */
				2962
				2963	/*
				2964	* Remove the source.
				2965	*/
				2966	if (new_parent && src_is_directory) {
				2967	/*
				2968	* Rewrite the ".." entry to point to the new
				2969	* directory.
				2970	*/
				2971	error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot,
				2972	target_dp->i_ino,
				2973	&first_block, &free_list, spaceres);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	2974	ASSERT(error != -EEXIST);
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	2975	if (error)
				2976	goto abort_return;
				2977	}
				2978
				2979	/*
				2980	* We always want to hit the ctime on the source inode.
				2981	*
				2982	* This isn't strictly required by the standards since the source
				2983	* inode isn't really being changed, but old unix file systems did
				2984	* it and some incremental backup programs won't work without it.
				2985	*/
				2986	xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG);
				2987	xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE);
				2988
				2989	/*
				2990	* Adjust the link count on src_dp. This is necessary when
				2991	* renaming a directory, either within one parent when
				2992	* the target existed, or across two parent directories.
				2993	*/
				2994	if (src_is_directory && (new_parent \|\| target_ip != NULL)) {
				2995
				2996	/*
				2997	* Decrement link count on src_directory since the
				2998	* entry that's moved no longer points to it.
				2999	*/
				3000	error = xfs_droplink(tp, src_dp);
				3001	if (error)
				3002	goto abort_return;
				3003	}
				3004
				3005	error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
				3006	&first_block, &free_list, spaceres);
				3007	if (error)
				3008	goto abort_return;
				3009
				3010	xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD \| XFS_ICHGTIME_CHG);
				3011	xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
				3012	if (new_parent)
				3013	xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
				3014
Carlos Maiolino	d31a182	2014-12-24 08:51:42 +1100	[diff] [blame^]	3015	finish_rename:
Dave Chinner	f6bba20	2013-08-12 20:49:46 +1000	[diff] [blame]	3016	/*
				3017	* If this is a synchronous mount, make sure that the
				3018	* rename transaction goes to disk before returning to
				3019	* the user.
				3020	*/
				3021	if (mp->m_flags & (XFS_MOUNT_WSYNC\|XFS_MOUNT_DIRSYNC)) {
				3022	xfs_trans_set_sync(tp);
				3023	}
				3024
				3025	error = xfs_bmap_finish(&tp, &free_list, &committed);
				3026	if (error) {
				3027	xfs_bmap_cancel(&free_list);
				3028	xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES \|
				3029	XFS_TRANS_ABORT));
				3030	goto std_return;
				3031	}
				3032
				3033	/*
				3034	* trans_commit will unlock src_ip, target_ip & decrement
				3035	* the vnode references.
				3036	*/
				3037	return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
				3038
				3039	abort_return:
				3040	cancel_flags \|= XFS_TRANS_ABORT;
				3041	error_return:
				3042	xfs_bmap_cancel(&free_list);
				3043	xfs_trans_cancel(tp, cancel_flags);
				3044	std_return:
				3045	return error;
				3046	}
				3047
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3048	STATIC int
				3049	xfs_iflush_cluster(
				3050	xfs_inode_t *ip,
				3051	xfs_buf_t *bp)
				3052	{
				3053	xfs_mount_t *mp = ip->i_mount;
Dave Chinner	5017e97	2010-01-11 11:47:40 +0000	[diff] [blame]	3054	struct xfs_perag *pag;
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3055	unsigned long first_index, mask;
David Chinner	c8f5f12	2008-05-20 11:30:15 +1000	[diff] [blame]	3056	unsigned long inodes_per_cluster;
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3057	int ilist_size;
				3058	xfs_inode_t **ilist;
				3059	xfs_inode_t *iq;
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3060	int nr_found;
				3061	int clcount = 0;
				3062	int bufwasdelwri;
				3063	int i;
				3064
Dave Chinner	5017e97	2010-01-11 11:47:40 +0000	[diff] [blame]	3065	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3066
Jie Liu	0f49efd	2013-12-13 15:51:48 +1100	[diff] [blame]	3067	inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
David Chinner	c8f5f12	2008-05-20 11:30:15 +1000	[diff] [blame]	3068	ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
David Chinner	49383b0	2008-05-19 16:29:34 +1000	[diff] [blame]	3069	ilist = kmem_alloc(ilist_size, KM_MAYFAIL\|KM_NOFS);
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3070	if (!ilist)
Dave Chinner	44b56e0	2010-01-11 11:47:43 +0000	[diff] [blame]	3071	goto out_put;
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3072
Jie Liu	0f49efd	2013-12-13 15:51:48 +1100	[diff] [blame]	3073	mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1);
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3074	first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
Dave Chinner	1a3e8f3	2010-12-17 17:29:43 +1100	[diff] [blame]	3075	rcu_read_lock();
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3076	/* really need a gang lookup range call here */
				3077	nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
David Chinner	c8f5f12	2008-05-20 11:30:15 +1000	[diff] [blame]	3078	first_index, inodes_per_cluster);
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3079	if (nr_found == 0)
				3080	goto out_free;
				3081
				3082	for (i = 0; i < nr_found; i++) {
				3083	iq = ilist[i];
				3084	if (iq == ip)
				3085	continue;
Dave Chinner	1a3e8f3	2010-12-17 17:29:43 +1100	[diff] [blame]	3086
				3087	/*
				3088	* because this is an RCU protected lookup, we could find a
				3089	* recently freed or even reallocated inode during the lookup.
				3090	* We need to check under the i_flags_lock for a valid inode
				3091	* here. Skip it if it is not valid or the wrong inode.
				3092	*/
				3093	spin_lock(&ip->i_flags_lock);
				3094	if (!ip->i_ino \|\|
				3095	(XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) {
				3096	spin_unlock(&ip->i_flags_lock);
				3097	continue;
				3098	}
				3099	spin_unlock(&ip->i_flags_lock);
				3100
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3101	/*
				3102	* Do an un-protected check to see if the inode is dirty and
				3103	* is a candidate for flushing. These checks will be repeated
				3104	* later after the appropriate locks are acquired.
				3105	*/
David Chinner	3354040	2008-03-06 13:43:59 +1100	[diff] [blame]	3106	if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0)
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3107	continue;
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3108
				3109	/*
				3110	* Try to get locks. If any are unavailable or it is pinned,
				3111	* then this inode cannot be flushed and is skipped.
				3112	*/
				3113
				3114	if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED))
				3115	continue;
				3116	if (!xfs_iflock_nowait(iq)) {
				3117	xfs_iunlock(iq, XFS_ILOCK_SHARED);
				3118	continue;
				3119	}
				3120	if (xfs_ipincount(iq)) {
				3121	xfs_ifunlock(iq);
				3122	xfs_iunlock(iq, XFS_ILOCK_SHARED);
				3123	continue;
				3124	}
				3125
				3126	/*
				3127	* arriving here means that this inode can be flushed. First
				3128	* re-check that it's dirty before flushing.
				3129	*/
David Chinner	3354040	2008-03-06 13:43:59 +1100	[diff] [blame]	3130	if (!xfs_inode_clean(iq)) {
				3131	int error;
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3132	error = xfs_iflush_int(iq, bp);
				3133	if (error) {
				3134	xfs_iunlock(iq, XFS_ILOCK_SHARED);
				3135	goto cluster_corrupt_out;
				3136	}
				3137	clcount++;
				3138	} else {
				3139	xfs_ifunlock(iq);
				3140	}
				3141	xfs_iunlock(iq, XFS_ILOCK_SHARED);
				3142	}
				3143
				3144	if (clcount) {
				3145	XFS_STATS_INC(xs_icluster_flushcnt);
				3146	XFS_STATS_ADD(xs_icluster_flushinode, clcount);
				3147	}
				3148
				3149	out_free:
Dave Chinner	1a3e8f3	2010-12-17 17:29:43 +1100	[diff] [blame]	3150	rcu_read_unlock();
Denys Vlasenko	f0e2d93	2008-05-19 16:31:57 +1000	[diff] [blame]	3151	kmem_free(ilist);
Dave Chinner	44b56e0	2010-01-11 11:47:43 +0000	[diff] [blame]	3152	out_put:
				3153	xfs_perag_put(pag);
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3154	return 0;
				3155
				3156
				3157	cluster_corrupt_out:
				3158	/*
				3159	* Corruption detected in the clustering loop. Invalidate the
				3160	* inode buffer and shut down the filesystem.
				3161	*/
Dave Chinner	1a3e8f3	2010-12-17 17:29:43 +1100	[diff] [blame]	3162	rcu_read_unlock();
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3163	/*
Christoph Hellwig	43ff212	2012-04-23 15:58:39 +1000	[diff] [blame]	3164	* Clean up the buffer. If it was delwri, just release it --
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3165	* brelse can handle it with no problems. If not, shut down the
				3166	* filesystem before releasing the buffer.
				3167	*/
Christoph Hellwig	43ff212	2012-04-23 15:58:39 +1000	[diff] [blame]	3168	bufwasdelwri = (bp->b_flags & _XBF_DELWRI_Q);
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3169	if (bufwasdelwri)
				3170	xfs_buf_relse(bp);
				3171
				3172	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
				3173
				3174	if (!bufwasdelwri) {
				3175	/*
				3176	* Just like incore_relse: if we have b_iodone functions,
				3177	* mark the buffer as an error and call them. Otherwise
				3178	* mark it as stale and brelse.
				3179	*/
Christoph Hellwig	cb669ca	2011-07-13 13:43:49 +0200	[diff] [blame]	3180	if (bp->b_iodone) {
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3181	XFS_BUF_UNDONE(bp);
Christoph Hellwig	c867cb6	2011-10-10 16:52:46 +0000	[diff] [blame]	3182	xfs_buf_stale(bp);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	3183	xfs_buf_ioerror(bp, -EIO);
Dave Chinner	e8aaba9	2014-10-02 09:04:22 +1000	[diff] [blame]	3184	xfs_buf_ioend(bp);
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3185	} else {
Christoph Hellwig	c867cb6	2011-10-10 16:52:46 +0000	[diff] [blame]	3186	xfs_buf_stale(bp);
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3187	xfs_buf_relse(bp);
				3188	}
				3189	}
				3190
				3191	/*
				3192	* Unlocks the flush lock
				3193	*/
Dave Chinner	04913fd	2012-04-23 15:58:41 +1000	[diff] [blame]	3194	xfs_iflush_abort(iq, false);
Denys Vlasenko	f0e2d93	2008-05-19 16:31:57 +1000	[diff] [blame]	3195	kmem_free(ilist);
Dave Chinner	44b56e0	2010-01-11 11:47:43 +0000	[diff] [blame]	3196	xfs_perag_put(pag);
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	3197	return -EFSCORRUPTED;
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3198	}
				3199
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3200	/*
Christoph Hellwig	4c46819	2012-04-23 15:58:36 +1000	[diff] [blame]	3201	* Flush dirty inode metadata into the backing buffer.
				3202	*
				3203	* The caller must have the inode lock and the inode flush lock held. The
				3204	* inode lock will still be held upon return to the caller, and the inode
				3205	* flush lock will be released after the inode has reached the disk.
				3206	*
				3207	* The caller must write out the buffer returned in *bpp and release it.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3208	*/
				3209	int
				3210	xfs_iflush(
Christoph Hellwig	4c46819	2012-04-23 15:58:36 +1000	[diff] [blame]	3211	struct xfs_inode *ip,
				3212	struct xfs_buf **bpp)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3213	{
Christoph Hellwig	4c46819	2012-04-23 15:58:36 +1000	[diff] [blame]	3214	struct xfs_mount *mp = ip->i_mount;
				3215	struct xfs_buf *bp;
				3216	struct xfs_dinode *dip;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3217	int error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3218
				3219	XFS_STATS_INC(xs_iflush_count);
				3220
Christoph Hellwig	579aa9c	2008-04-22 17:34:00 +1000	[diff] [blame]	3221	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL\|XFS_ILOCK_SHARED));
Christoph Hellwig	474fce0	2011-12-18 20:00:09 +0000	[diff] [blame]	3222	ASSERT(xfs_isiflocked(ip));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3223	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE \|\|
Christoph Hellwig	8096b1e	2011-12-18 20:00:07 +0000	[diff] [blame]	3224	ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3225
Christoph Hellwig	4c46819	2012-04-23 15:58:36 +1000	[diff] [blame]	3226	*bpp = NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3227
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3228	xfs_iunpin_wait(ip);
				3229
				3230	/*
Dave Chinner	4b6a468	2010-01-11 11:45:21 +0000	[diff] [blame]	3231	* For stale inodes we cannot rely on the backing buffer remaining
				3232	* stale in cache for the remaining life of the stale inode and so
Christoph Hellwig	475ee41	2012-07-03 12:21:22 -0400	[diff] [blame]	3233	* xfs_imap_to_bp() below may give us a buffer that no longer contains
Dave Chinner	4b6a468	2010-01-11 11:45:21 +0000	[diff] [blame]	3234	* inodes below. We have to check this after ensuring the inode is
				3235	* unpinned so that it is safe to reclaim the stale inode after the
				3236	* flush call.
				3237	*/
				3238	if (xfs_iflags_test(ip, XFS_ISTALE)) {
				3239	xfs_ifunlock(ip);
				3240	return 0;
				3241	}
				3242
				3243	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3244	* This may have been unpinned because the filesystem is shutting
				3245	* down forcibly. If that's the case we must not write this inode
Christoph Hellwig	32ce90a	2012-04-23 15:58:32 +1000	[diff] [blame]	3246	* to disk, because the log record didn't make it to disk.
				3247	*
				3248	* We also have to remove the log item from the AIL in this case,
				3249	* as we wait for an empty AIL as part of the unmount process.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3250	*/
				3251	if (XFS_FORCED_SHUTDOWN(mp)) {
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	3252	error = -EIO;
Christoph Hellwig	32ce90a	2012-04-23 15:58:32 +1000	[diff] [blame]	3253	goto abort_out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3254	}
				3255
				3256	/*
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	3257	* Get the buffer containing the on-disk inode.
				3258	*/
Christoph Hellwig	475ee41	2012-07-03 12:21:22 -0400	[diff] [blame]	3259	error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK,
				3260	0);
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	3261	if (error \|\| !bp) {
				3262	xfs_ifunlock(ip);
				3263	return error;
				3264	}
				3265
				3266	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3267	* First flush out the inode that xfs_iflush was called with.
				3268	*/
				3269	error = xfs_iflush_int(ip, bp);
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3270	if (error)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3271	goto corrupt_out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3272
				3273	/*
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	3274	* If the buffer is pinned then push on the log now so we won't
				3275	* get stuck waiting in the write for too long.
				3276	*/
Chandra Seetharaman	811e64c	2011-07-22 23:40:27 +0000	[diff] [blame]	3277	if (xfs_buf_ispinned(bp))
Christoph Hellwig	a14a348	2010-01-19 09:56:46 +0000	[diff] [blame]	3278	xfs_log_force(mp, 0);
David Chinner	a3f74ff	2008-03-06 13:43:42 +1100	[diff] [blame]	3279
				3280	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3281	* inode clustering:
				3282	* see if other inodes can be gathered into this write
				3283	*/
David Chinner	bad5584	2008-03-06 13:43:49 +1100	[diff] [blame]	3284	error = xfs_iflush_cluster(ip, bp);
				3285	if (error)
				3286	goto cluster_corrupt_out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3287
Christoph Hellwig	4c46819	2012-04-23 15:58:36 +1000	[diff] [blame]	3288	*bpp = bp;
				3289	return 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3290
				3291	corrupt_out:
				3292	xfs_buf_relse(bp);
Nathan Scott	7d04a33	2006-06-09 14:58:38 +1000	[diff] [blame]	3293	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3294	cluster_corrupt_out:
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	3295	error = -EFSCORRUPTED;
Christoph Hellwig	32ce90a	2012-04-23 15:58:32 +1000	[diff] [blame]	3296	abort_out:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3297	/*
				3298	* Unlocks the flush lock
				3299	*/
Dave Chinner	04913fd	2012-04-23 15:58:41 +1000	[diff] [blame]	3300	xfs_iflush_abort(ip, false);
Christoph Hellwig	32ce90a	2012-04-23 15:58:32 +1000	[diff] [blame]	3301	return error;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3302	}
				3303
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3304	STATIC int
				3305	xfs_iflush_int(
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3306	struct xfs_inode *ip,
				3307	struct xfs_buf *bp)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3308	{
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3309	struct xfs_inode_log_item *iip = ip->i_itemp;
				3310	struct xfs_dinode *dip;
				3311	struct xfs_mount *mp = ip->i_mount;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3312
Christoph Hellwig	579aa9c	2008-04-22 17:34:00 +1000	[diff] [blame]	3313	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL\|XFS_ILOCK_SHARED));
Christoph Hellwig	474fce0	2011-12-18 20:00:09 +0000	[diff] [blame]	3314	ASSERT(xfs_isiflocked(ip));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3315	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE \|\|
Christoph Hellwig	8096b1e	2011-12-18 20:00:07 +0000	[diff] [blame]	3316	ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3317	ASSERT(iip != NULL && iip->ili_fields != 0);
Dave Chinner	263997a	2014-05-20 07:46:40 +1000	[diff] [blame]	3318	ASSERT(ip->i_d.di_version > 1);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3319
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3320	/* set dip = inode's place in the buffer /
Christoph Hellwig	92bfc6e	2008-11-28 14:23:41 +1100	[diff] [blame]	3321	dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3322
Christoph Hellwig	69ef921	2011-07-08 14:36:05 +0200	[diff] [blame]	3323	if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3324	mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3325	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
				3326	"%s: Bad inode %Lu magic number 0x%x, ptr 0x%p",
				3327	__func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3328	goto corrupt_out;
				3329	}
				3330	if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC,
				3331	mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) {
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3332	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
				3333	"%s: Bad inode %Lu, ptr 0x%p, magic number 0x%x",
				3334	__func__, ip->i_ino, ip, ip->i_d.di_magic);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3335	goto corrupt_out;
				3336	}
Al Viro	abbede1	2011-07-26 02:31:30 -0400	[diff] [blame]	3337	if (S_ISREG(ip->i_d.di_mode)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3338	if (XFS_TEST_ERROR(
				3339	(ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
				3340	(ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
				3341	mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) {
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3342	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
				3343	"%s: Bad regular inode %Lu, ptr 0x%p",
				3344	__func__, ip->i_ino, ip);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3345	goto corrupt_out;
				3346	}
Al Viro	abbede1	2011-07-26 02:31:30 -0400	[diff] [blame]	3347	} else if (S_ISDIR(ip->i_d.di_mode)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3348	if (XFS_TEST_ERROR(
				3349	(ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
				3350	(ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
				3351	(ip->i_d.di_format != XFS_DINODE_FMT_LOCAL),
				3352	mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) {
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3353	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
				3354	"%s: Bad directory inode %Lu, ptr 0x%p",
				3355	__func__, ip->i_ino, ip);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3356	goto corrupt_out;
				3357	}
				3358	}
				3359	if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents >
				3360	ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5,
				3361	XFS_RANDOM_IFLUSH_5)) {
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3362	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
				3363	"%s: detected corrupt incore inode %Lu, "
				3364	"total extents = %d, nblocks = %Ld, ptr 0x%p",
				3365	__func__, ip->i_ino,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3366	ip->i_d.di_nextents + ip->i_d.di_anextents,
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3367	ip->i_d.di_nblocks, ip);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3368	goto corrupt_out;
				3369	}
				3370	if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize,
				3371	mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) {
Dave Chinner	6a19d93	2011-03-07 10:02:35 +1100	[diff] [blame]	3372	xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
				3373	"%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p",
				3374	__func__, ip->i_ino, ip->i_d.di_forkoff, ip);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3375	goto corrupt_out;
				3376	}
Dave Chinner	e60896d	2013-07-24 15:47:30 +1000	[diff] [blame]	3377
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3378	/*
Dave Chinner	263997a	2014-05-20 07:46:40 +1000	[diff] [blame]	3379	* Inode item log recovery for v2 inodes are dependent on the
Dave Chinner	e60896d	2013-07-24 15:47:30 +1000	[diff] [blame]	3380	* di_flushiter count for correct sequencing. We bump the flush
				3381	* iteration count so we can detect flushes which postdate a log record
				3382	* during recovery. This is redundant as we now log every change and
				3383	* hence this can't happen but we need to still do it to ensure
				3384	* backwards compatibility with old kernels that predate logging all
				3385	* inode changes.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3386	*/
Dave Chinner	e60896d	2013-07-24 15:47:30 +1000	[diff] [blame]	3387	if (ip->i_d.di_version < 3)
				3388	ip->i_d.di_flushiter++;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3389
				3390	/*
				3391	* Copy the dirty parts of the inode into the on-disk
				3392	* inode. We always copy out the core of the inode,
				3393	* because if the inode is dirty at all the core must
				3394	* be.
				3395	*/
Christoph Hellwig	81591fe	2008-11-28 14:23:39 +1100	[diff] [blame]	3396	xfs_dinode_to_disk(dip, &ip->i_d);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3397
				3398	/* Wrap, we never let the log put out DI_MAX_FLUSH */
				3399	if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
				3400	ip->i_d.di_flushiter = 0;
				3401
Eric Sandeen	fd9fdba	2014-04-14 19:04:46 +1000	[diff] [blame]	3402	xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
David Chinner	e4ac967	2008-04-10 12:23:58 +1000	[diff] [blame]	3403	if (XFS_IFORK_Q(ip))
Eric Sandeen	fd9fdba	2014-04-14 19:04:46 +1000	[diff] [blame]	3404	xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3405	xfs_inobp_check(mp, bp);
				3406
				3407	/*
Christoph Hellwig	f5d8d5c	2012-02-29 09:53:54 +0000	[diff] [blame]	3408	* We've recorded everything logged in the inode, so we'd like to clear
				3409	* the ili_fields bits so we don't log and flush things unnecessarily.
				3410	* However, we can't stop logging all this information until the data
				3411	* we've copied into the disk buffer is written to disk. If we did we
				3412	* might overwrite the copy of the inode in the log with all the data
				3413	* after re-logging only part of it, and in the face of a crash we
				3414	* wouldn't have all the data we need to recover.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3415	*
Christoph Hellwig	f5d8d5c	2012-02-29 09:53:54 +0000	[diff] [blame]	3416	* What we do is move the bits to the ili_last_fields field. When
				3417	* logging the inode, these bits are moved back to the ili_fields field.
				3418	* In the xfs_iflush_done() routine we clear ili_last_fields, since we
				3419	* know that the information those bits represent is permanently on
				3420	* disk. As long as the flush completes before the inode is logged
				3421	* again, then both ili_fields and ili_last_fields will be cleared.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3422	*
Christoph Hellwig	f5d8d5c	2012-02-29 09:53:54 +0000	[diff] [blame]	3423	* We can play with the ili_fields bits here, because the inode lock
				3424	* must be held exclusively in order to set bits there and the flush
				3425	* lock protects the ili_last_fields bits. Set ili_logged so the flush
				3426	* done routine can tell whether or not to look in the AIL. Also, store
				3427	* the current LSN of the inode so that we can tell whether the item has
				3428	* moved in the AIL from xfs_iflush_done(). In order to read the lsn we
				3429	* need the AIL lock, because it is a 64 bit value that cannot be read
				3430	* atomically.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3431	*/
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3432	iip->ili_last_fields = iip->ili_fields;
				3433	iip->ili_fields = 0;
				3434	iip->ili_logged = 1;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3435
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3436	xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
				3437	&iip->ili_item.li_lsn);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3438
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3439	/*
				3440	* Attach the function xfs_iflush_done to the inode's
				3441	* buffer. This will remove the inode from the AIL
				3442	* and unlock the inode's flush lock when the inode is
				3443	* completely written to disk.
				3444	*/
				3445	xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3446
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3447	/* update the lsn in the on disk inode if required */
				3448	if (ip->i_d.di_version == 3)
				3449	dip->di_lsn = cpu_to_be64(iip->ili_item.li_lsn);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3450
Christoph Hellwig	93848a9	2013-04-03 16:11:17 +1100	[diff] [blame]	3451	/* generate the checksum. */
				3452	xfs_dinode_calc_crc(mp, dip);
				3453
				3454	ASSERT(bp->b_fspriv != NULL);
				3455	ASSERT(bp->b_iodone != NULL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3456	return 0;
				3457
				3458	corrupt_out:
Dave Chinner	2451337	2014-06-25 14:58:08 +1000	[diff] [blame]	3459	return -EFSCORRUPTED;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3460	}