md: avoid deadlock when dirty buffers during md_stop. When the last process closes /dev/mdX sync_blockdev will be called so that all buffers get flushed. So if it is then opened for the STOP_ARRAY ioctl to be sent there will be nothing to flush. However if we open /dev/mdX in order to send the STOP_ARRAY ioctl just moments before some other process which was writing closes their file descriptor, then there won't be a 'last close' and the buffers might not get flushed. So do_md_stop() calls sync_blockdev(). However at this point it is holding ->reconfig_mutex. So if the array is currently 'clean' then the writes from sync_blockdev() will not complete until the array can be marked dirty and that won't happen until some other thread can get ->reconfig_mutex. So we deadlock. We need to move the sync_blockdev() call to before we take ->reconfig_mutex. However then some other thread could open /dev/mdX and write to it after we call sync_blockdev() and before we actually stop the array. This can leave dirty data in the page cache which is awkward. So introduce new flag MD_STILL_CLOSED. Set it before calling sync_blockdev(), clear it if anyone does open the file, and abort the STOP_ARRAY attempt if it gets set before we lock against further opens. It is still possible to get problems if you open /dev/mdX, write to it, then issue the STOP_ARRAY ioctl. Just don't do that. Signed-off-by: NeilBrown <neilb@suse.de>

commit: 260fa034ef7a4ff8b73068b48ac497edd5217491 [log] [tgz]
author: NeilBrown <neilb@suse.de> Tue Aug 27 16:44:13 2013 +1000
committer: NeilBrown <neilb@suse.de> Tue Aug 27 16:45:00 2013 +1000
tree: 6a328502545c8f155b34c08c72f7023166991bce
parent: 7a0a5355cbc71efa430c3730ffbd67ae04abfe31 [diff]
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 084a654..adf4d7e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c

@@ -5337,8 +5337,14 @@
 		err = -EBUSY;
 		goto out;
 	}
-	if (bdev)
-		sync_blockdev(bdev);
+	if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) {
+		/* Someone opened the device since we flushed it
+		 * so page cache could be dirty and it is too late
+		 * to flush.  So abort
+		 */
+		mutex_unlock(&mddev->open_mutex);
+		return -EBUSY;
+	}
 	if (mddev->pers) {
 		__md_stop_writes(mddev);
 
@@ -5373,14 +5379,14 @@
 		mutex_unlock(&mddev->open_mutex);
 		return -EBUSY;
 	}
-	if (bdev)
-		/* It is possible IO was issued on some other
-		 * open file which was closed before we took ->open_mutex.
-		 * As that was not the last close __blkdev_put will not
-		 * have called sync_blockdev, so we must.
+	if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) {
+		/* Someone opened the device since we flushed it
+		 * so page cache could be dirty and it is too late
+		 * to flush.  So abort
 		 */
-		sync_blockdev(bdev);
-
+		mutex_unlock(&mddev->open_mutex);
+		return -EBUSY;
+	}
 	if (mddev->pers) {
 		if (mddev->ro)
 			set_disk_ro(disk, 0);
@@ -6417,6 +6423,20 @@
 						 !test_bit(MD_RECOVERY_NEEDED,
 							   &mddev->flags),
 						 msecs_to_jiffies(5000));
+	if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
+		/* Need to flush page cache, and ensure no-one else opens
+		 * and writes
+		 */
+		mutex_lock(&mddev->open_mutex);
+		if (atomic_read(&mddev->openers) > 1) {
+			mutex_unlock(&mddev->open_mutex);
+			err = -EBUSY;
+			goto abort;
+		}
+		set_bit(MD_STILL_CLOSED, &mddev->flags);
+		mutex_unlock(&mddev->open_mutex);
+		sync_blockdev(bdev);
+	}
 	err = mddev_lock(mddev);
 	if (err) {
 		printk(KERN_INFO 
@@ -6670,6 +6690,7 @@
 
 	err = 0;
 	atomic_inc(&mddev->openers);
+	clear_bit(MD_STILL_CLOSED, &mddev->flags);
 	mutex_unlock(&mddev->open_mutex);
 
 	check_disk_change(bdev);

diff --git a/drivers/md/md.h b/drivers/md/md.h
index 53283be..608050c 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h

@@ -211,6 +211,9 @@
 #define MD_CHANGE_PENDING 2	/* switch from 'clean' to 'active' in progress */
 #define MD_UPDATE_SB_FLAGS (1 | 2 | 4)	/* If these are set, md_update_sb needed */
 #define MD_ARRAY_FIRST_USE 3    /* First use of array, needs initialization */
+#define MD_STILL_CLOSED	4	/* If set, then array has not been opened since
+				 * md_ioctl checked on it.
+				 */
 
 	int				suspended;
 	atomic_t			active_io;
commit	260fa034ef7a4ff8b73068b48ac497edd5217491	[log] [tgz]
author	NeilBrown <neilb@suse.de>	Tue Aug 27 16:44:13 2013 +1000
committer	NeilBrown <neilb@suse.de>	Tue Aug 27 16:45:00 2013 +1000
tree	6a328502545c8f155b34c08c72f7023166991bce
parent	7a0a5355cbc71efa430c3730ffbd67ae04abfe31 [diff]