ocfs2: fix rare stale inode errors when exporting via nfs For nfs exporting, ocfs2_get_dentry() returns the dentry for fh. ocfs2_get_dentry() may read from disk when the inode is not in memory, without any cross cluster lock. this leads to the file system loading a stale inode. This patch fixes above problem. Solution is that in case of inode is not in memory, we get the cluster lock(PR) of alloc inode where the inode in question is allocated from (this causes node on which deletion is done sync the alloc inode) before reading out the inode itsself. then we check the bitmap in the group (the inode in question allcated from) to see if the bit is clear. if it's clear then it's stale. if the bit is set, we then check generation as the existing code does. We have to read out the inode in question from disk first to know its alloc slot and allot bit. And if its not stale we read it out using ocfs2_iget(). The second read should then be from cache. And also we have to add a per superblock nfs_sync_lock to cover the lock for alloc inode and that for inode in question. this is because ocfs2_get_dentry() and ocfs2_delete_inode() lock on them in reverse order. nfs_sync_lock is locked in EX mode in ocfs2_get_dentry() and in PR mode in ocfs2_delete_inode(). so that mutliple ocfs2_delete_inode() can run concurrently in normal case. [mfasheh@suse.com: build warning fixes and comment cleanups] Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com> Acked-by: Joel Becker <joel.becker@oracle.com> Signed-off-by: Mark Fasheh <mfasheh@suse.com>

commit: 6ca497a83e592d64e050c4d04b6dedb8c915f39a [log] [tgz]
author: wengang wang <wen.gang.wang@oracle.com> Fri Mar 06 21:29:10 2009 +0800
committer: Mark Fasheh <mfasheh@suse.com> Fri Apr 03 11:39:25 2009 -0700
tree: 0b9cd611d6d907881841eca73d12a7f3b85f1716
parent: 9405dccfd3201d2b76e120949bec81ba8cfbd2d0 [diff] [blame]
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 7219a86..e15fc7d 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c

@@ -244,6 +244,10 @@
 	.flags		= 0,
 };
 
+static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
+	.flags		= 0,
+};
+
 static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
 	.get_osb	= ocfs2_get_dentry_osb,
 	.post_unlock	= ocfs2_dentry_post_unlock,
@@ -622,6 +626,17 @@
 				   &ocfs2_rename_lops, osb);
 }
 
+static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
+					 struct ocfs2_super *osb)
+{
+	/* nfs_sync lockres doesn't come from a slab so we call init
+	 * once on it manually.  */
+	ocfs2_lock_res_init_once(res);
+	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name);
+	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC,
+				   &ocfs2_nfs_sync_lops, osb);
+}
+
 void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
 			      struct ocfs2_file_private *fp)
 {
@@ -2417,6 +2432,34 @@
 		ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
 }
 
+int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex)
+{
+	int status;
+	struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
+
+	if (ocfs2_is_hard_readonly(osb))
+		return -EROFS;
+
+	if (ocfs2_mount_local(osb))
+		return 0;
+
+	status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE,
+				    0, 0);
+	if (status < 0)
+		mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status);
+
+	return status;
+}
+
+void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex)
+{
+	struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
+
+	if (!ocfs2_mount_local(osb))
+		ocfs2_cluster_unlock(osb, lockres,
+				     ex ? LKM_EXMODE : LKM_PRMODE);
+}
+
 int ocfs2_dentry_lock(struct dentry *dentry, int ex)
 {
 	int ret;
@@ -2798,6 +2841,7 @@
 local:
 	ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
 	ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
+	ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
 
 	osb->cconn = conn;
 
@@ -2833,6 +2877,7 @@
 
 	ocfs2_lock_res_free(&osb->osb_super_lockres);
 	ocfs2_lock_res_free(&osb->osb_rename_lockres);
+	ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);
 
 	ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
 	osb->cconn = NULL;
@@ -3015,6 +3060,7 @@
 {
 	ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);
 	ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);
+	ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres);
 }
 
 int ocfs2_drop_inode_locks(struct inode *inode)
commit	6ca497a83e592d64e050c4d04b6dedb8c915f39a	[log] [tgz]
author	wengang wang <wen.gang.wang@oracle.com>	Fri Mar 06 21:29:10 2009 +0800
committer	Mark Fasheh <mfasheh@suse.com>	Fri Apr 03 11:39:25 2009 -0700
tree	0b9cd611d6d907881841eca73d12a7f3b85f1716
parent	9405dccfd3201d2b76e120949bec81ba8cfbd2d0 [diff] [blame]