[XFS] Fix race when looking up reclaimable inodes If we get a race looking up a reclaimable inode, we can end up with the winner proceeding to use the inode before it has been completely re-initialised. This is a Bad Thing. Fix the race by checking whether we are still initialising the inod eonce we have a reference to it, and if so wait for the initialisation to complete before continuing. While there, fix a leaked reference count in the same code when encountering an unlinked inode and we are not doing a lookup for a create operation. SGI-PV: 987246 SGI-Modid: xfs-linux-melb:xfs-kern:32429a Signed-off-by: David Chinner <david@fromorbit.com> Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit: 6bfb3d065f4c498c17a3a07f3dc08cedff53aff4 [log] [tgz]
author: David Chinner <david@fromorbit.com> Thu Oct 30 18:32:43 2008 +1100
committer: Lachlan McIlroy <lachlan@redback.melbourne.sgi.com> Thu Oct 30 18:32:43 2008 +1100
tree: c5c528c77e44584616a175e0dcc89713e7b76d0a
parent: e0b8e8b65d578f5d5538465dff8392cf02e1cc5d [diff]
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 2147176..77d6ddc 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h

@@ -77,6 +77,7 @@
 #include <linux/spinlock.h>
 #include <linux/random.h>
 #include <linux/ctype.h>
+#include <linux/writeback.h>
 
 #include <asm/page.h>
 #include <asm/div64.h>

diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 837cae7..bf4dc5e 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c

@@ -52,7 +52,7 @@
 	int			lock_flags) __releases(pag->pag_ici_lock)
 {
 	struct xfs_mount	*mp = ip->i_mount;
-	int			error = 0;
+	int			error = EAGAIN;
 
 	/*
 	 * If INEW is set this inode is being set up
@@ -60,7 +60,6 @@
 	 * Pause and try again.
 	 */
 	if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) {
-		error = EAGAIN;
 		XFS_STATS_INC(xs_ig_frecycle);
 		goto out_error;
 	}
@@ -73,7 +72,6 @@
 		 * error immediately so we don't remove it from the reclaim
 		 * list and potentially leak the inode.
 		 */
-
 		if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
 			error = ENOENT;
 			goto out_error;
@@ -91,27 +89,42 @@
 			error = ENOMEM;
 			goto out_error;
 		}
+
+		/*
+		 * We must set the XFS_INEW flag before clearing the
+		 * XFS_IRECLAIMABLE flag so that if a racing lookup does
+		 * not find the XFS_IRECLAIMABLE above but has the igrab()
+		 * below succeed we can safely check XFS_INEW to detect
+		 * that this inode is still being initialised.
+		 */
 		xfs_iflags_set(ip, XFS_INEW);
 		xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
 
 		/* clear the radix tree reclaim flag as well. */
 		__xfs_inode_clear_reclaim_tag(mp, pag, ip);
-		read_unlock(&pag->pag_ici_lock);
 	} else if (!igrab(VFS_I(ip))) {
 		/* If the VFS inode is being torn down, pause and try again. */
-		error = EAGAIN;
 		XFS_STATS_INC(xs_ig_frecycle);
 		goto out_error;
-	} else {
-		/* we've got a live one */
-		read_unlock(&pag->pag_ici_lock);
+	} else if (xfs_iflags_test(ip, XFS_INEW)) {
+		/*
+		 * We are racing with another cache hit that is
+		 * currently recycling this inode out of the XFS_IRECLAIMABLE
+		 * state. Wait for the initialisation to complete before
+		 * continuing.
+		 */
+		wait_on_inode(VFS_I(ip));
 	}
 
 	if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
 		error = ENOENT;
-		goto out;
+		iput(VFS_I(ip));
+		goto out_error;
 	}
 
+	/* We've got a live one. */
+	read_unlock(&pag->pag_ici_lock);
+
 	if (lock_flags != 0)
 		xfs_ilock(ip, lock_flags);
 
@@ -122,7 +135,6 @@
 
 out_error:
 	read_unlock(&pag->pag_ici_lock);
-out:
 	return error;
 }
commit	6bfb3d065f4c498c17a3a07f3dc08cedff53aff4	[log] [tgz]
author	David Chinner <david@fromorbit.com>	Thu Oct 30 18:32:43 2008 +1100
committer	Lachlan McIlroy <lachlan@redback.melbourne.sgi.com>	Thu Oct 30 18:32:43 2008 +1100
tree	c5c528c77e44584616a175e0dcc89713e7b76d0a
parent	e0b8e8b65d578f5d5538465dff8392cf02e1cc5d [diff]