Merge tag 'md-3.3-fixes' of git://neil.brown.name/md Pull md fixes from Neil Brown: "Three fixes for md in 3.3-rc: Two relate to the recently added drive replacement. One fixes the problem where a read error in RAID10 would sometimes be retried indefinitely." * tag 'md-3.3-fixes' of git://neil.brown.name/md: md/raid10: fix assembling of arrays with replacement devices. md/raid10: fix handling of error on last working device in array. md/raid1: fix buglet in md_raid1_contested.

commit: a2e5f13ce806e79b5a234f9653a2a245424e29fa [log] [tgz]
author: Linus Torvalds <torvalds@linux-foundation.org> Mon Mar 05 16:01:25 2012 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> Mon Mar 05 16:01:25 2012 -0800
tree: 06ec6d22ccfa157195565c1777467a3677d6818b
parent: 3e85fb9cd4f711d70c5d26baa86e438390731eab [diff]
parent: 7a90484825680e7831856105f5fef654e6c02701 [diff]
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a368db2..a0b225e 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c

@@ -624,7 +624,7 @@
 		return 1;
 
 	rcu_read_lock();
-	for (i = 0; i < conf->raid_disks; i++) {
+	for (i = 0; i < conf->raid_disks * 2; i++) {
 		struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
 		if (rdev && !test_bit(Faulty, &rdev->flags)) {
 			struct request_queue *q = bdev_get_queue(rdev->bdev);

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 6e8aa21..58c44d6 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c

@@ -67,6 +67,7 @@
 
 static void allow_barrier(struct r10conf *conf);
 static void lower_barrier(struct r10conf *conf);
+static int enough(struct r10conf *conf, int ignore);
 
 static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
 {
@@ -347,6 +348,19 @@
 		 * wait for the 'master' bio.
 		 */
 		set_bit(R10BIO_Uptodate, &r10_bio->state);
+	} else {
+		/* If all other devices that store this block have
+		 * failed, we want to return the error upwards rather
+		 * than fail the last device.  Here we redefine
+		 * "uptodate" to mean "Don't want to retry"
+		 */
+		unsigned long flags;
+		spin_lock_irqsave(&conf->device_lock, flags);
+		if (!enough(conf, rdev->raid_disk))
+			uptodate = 1;
+		spin_unlock_irqrestore(&conf->device_lock, flags);
+	}
+	if (uptodate) {
 		raid_end_bio_io(r10_bio);
 		rdev_dec_pending(rdev, conf->mddev);
 	} else {
@@ -2052,6 +2066,7 @@
 		       "md/raid10:%s: %s: Failing raid device\n",
 		       mdname(mddev), b);
 		md_error(mddev, conf->mirrors[d].rdev);
+		r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED;
 		return;
 	}
 
@@ -2105,8 +2120,11 @@
 				    rdev,
 				    r10_bio->devs[r10_bio->read_slot].addr
 				    + sect,
-				    s, 0))
+				    s, 0)) {
 				md_error(mddev, rdev);
+				r10_bio->devs[r10_bio->read_slot].bio
+					= IO_BLOCKED;
+			}
 			break;
 		}
 
@@ -2299,17 +2317,20 @@
 	 * This is all done synchronously while the array is
 	 * frozen.
 	 */
+	bio = r10_bio->devs[slot].bio;
+	bdevname(bio->bi_bdev, b);
+	bio_put(bio);
+	r10_bio->devs[slot].bio = NULL;
+
 	if (mddev->ro == 0) {
 		freeze_array(conf);
 		fix_read_error(conf, mddev, r10_bio);
 		unfreeze_array(conf);
-	}
+	} else
+		r10_bio->devs[slot].bio = IO_BLOCKED;
+
 	rdev_dec_pending(rdev, mddev);
 
-	bio = r10_bio->devs[slot].bio;
-	bdevname(bio->bi_bdev, b);
-	r10_bio->devs[slot].bio =
-		mddev->ro ? IO_BLOCKED : NULL;
 read_more:
 	rdev = read_balance(conf, r10_bio, &max_sectors);
 	if (rdev == NULL) {
@@ -2318,13 +2339,10 @@
 		       mdname(mddev), b,
 		       (unsigned long long)r10_bio->sector);
 		raid_end_bio_io(r10_bio);
-		bio_put(bio);
 		return;
 	}
 
 	do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC);
-	if (bio)
-		bio_put(bio);
 	slot = r10_bio->read_slot;
 	printk_ratelimited(
 		KERN_ERR
@@ -2360,7 +2378,6 @@
 			mbio->bi_phys_segments++;
 		spin_unlock_irq(&conf->device_lock);
 		generic_make_request(bio);
-		bio = NULL;
 
 		r10_bio = mempool_alloc(conf->r10bio_pool,
 					GFP_NOIO);
@@ -3243,7 +3260,6 @@
 			disk->rdev = rdev;
 		}
 
-		disk->rdev = rdev;
 		disk_stack_limits(mddev->gendisk, rdev->bdev,
 				  rdev->data_offset << 9);
 		/* as we don't honour merge_bvec_fn, we must never risk
commit	a2e5f13ce806e79b5a234f9653a2a245424e29fa	[log] [tgz]
author	Linus Torvalds <torvalds@linux-foundation.org>	Mon Mar 05 16:01:25 2012 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	Mon Mar 05 16:01:25 2012 -0800
tree	06ec6d22ccfa157195565c1777467a3677d6818b
parent	3e85fb9cd4f711d70c5d26baa86e438390731eab [diff]
parent	7a90484825680e7831856105f5fef654e6c02701 [diff]