[DLM] fix requestqueue race Red Hat BZ 211914 There's a race between dlm_recoverd (1) enabling locking and (2) clearing out the requestqueue, and dlm_recvd (1) checking if locking is enabled and (2) adding a message to the requestqueue. An order of recoverd(1), recvd(1), recvd(2), recoverd(2) will result in a message being left on the requestqueue. The fix is to have dlm_recvd check if dlm_recoverd has enabled locking after taking the mutex for the requestqueue and if it has processing the message instead of queueing it. Signed-off-by: David Teigland <teigland@redhat.com> Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit: d4400156d415540086c34a06e5d233122d6bf56a [log] [tgz]
author: David Teigland <teigland@redhat.com> Tue Oct 31 11:55:56 2006 -0600
committer: Steven Whitehouse <swhiteho@redhat.com> Thu Nov 30 10:35:10 2006 -0500
tree: 747e4d270fb453d57926d6b6cab564664d9d2c0f
parent: 435618b75b82b5ee511cc01fcdda9c44adb2f4bd [diff] [blame]
diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c
index 7b2b089..0226d2a 100644
--- a/fs/dlm/requestqueue.c
+++ b/fs/dlm/requestqueue.c

@@ -30,26 +30,39 @@
  * lockspace is enabled on some while still suspended on others.
  */
 
-void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd)
+int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd)
 {
 	struct rq_entry *e;
 	int length = hd->h_length;
+	int rv = 0;
 
 	if (dlm_is_removed(ls, nodeid))
-		return;
+		return 0;
 
 	e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL);
 	if (!e) {
 		log_print("dlm_add_requestqueue: out of memory\n");
-		return;
+		return 0;
 	}
 
 	e->nodeid = nodeid;
 	memcpy(e->request, hd, length);
 
+	/* We need to check dlm_locking_stopped() after taking the mutex to
+	   avoid a race where dlm_recoverd enables locking and runs
+	   process_requestqueue between our earlier dlm_locking_stopped check
+	   and this addition to the requestqueue. */
+
 	mutex_lock(&ls->ls_requestqueue_mutex);
-	list_add_tail(&e->list, &ls->ls_requestqueue);
+	if (dlm_locking_stopped(ls))
+		list_add_tail(&e->list, &ls->ls_requestqueue);
+	else {
+		log_debug(ls, "dlm_add_requestqueue skip from %d", nodeid);
+		kfree(e);
+		rv = -EAGAIN;
+	}
 	mutex_unlock(&ls->ls_requestqueue_mutex);
+	return rv;
 }
 
 int dlm_process_requestqueue(struct dlm_ls *ls)
commit	d4400156d415540086c34a06e5d233122d6bf56a	[log] [tgz]
author	David Teigland <teigland@redhat.com>	Tue Oct 31 11:55:56 2006 -0600
committer	Steven Whitehouse <swhiteho@redhat.com>	Thu Nov 30 10:35:10 2006 -0500
tree	747e4d270fb453d57926d6b6cab564664d9d2c0f
parent	435618b75b82b5ee511cc01fcdda9c44adb2f4bd [diff] [blame]