md: add proper write-congestion reporting to RAID1 and RAID10.

RAID1 and RAID10 handle write requests by queuing them for handling by
a separate thread.  This is because when a write-intent-bitmap is
active we might need to update the bitmap first, so it is good to
queue a lot of writes, then do one big bitmap update for them all.

However writeback request devices to appear to be congested after a
while so it can make some guesstimate of throughput.  The infinite
queue defeats that (note that RAID5 has already has a finite queue so
it doesn't suffer from this problem).

So impose a limit on the number of pending write requests.  By default
it is 1024 which seems to be generally suitable.  Make it configurable
via module option just in case someone finds a regression.

Signed-off-by: NeilBrown <neilb@suse.de>
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 8427ff1..9496463 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -58,6 +58,12 @@
  */
 #define	NR_RAID10_BIOS 256
 
+/* When there are this many requests queue to be written by
+ * the raid10 thread, we become 'congested' to provide back-pressure
+ * for writeback.
+ */
+static int max_queued_requests = 1024;
+
 static void allow_barrier(struct r10conf *conf);
 static void lower_barrier(struct r10conf *conf);
 
@@ -681,6 +687,10 @@
 	struct r10conf *conf = mddev->private;
 	int i, ret = 0;
 
+	if ((bits & (1 << BDI_async_congested)) &&
+	    conf->pending_count >= max_queued_requests)
+		return 1;
+
 	if (mddev_congested(mddev, bits))
 		return 1;
 	rcu_read_lock();
@@ -706,10 +716,12 @@
 	if (conf->pending_bio_list.head) {
 		struct bio *bio;
 		bio = bio_list_get(&conf->pending_bio_list);
+		conf->pending_count = 0;
 		spin_unlock_irq(&conf->device_lock);
 		/* flush any pending bitmap writes to disk
 		 * before proceeding w/ I/O */
 		bitmap_unplug(conf->mddev->bitmap);
+		wake_up(&conf->wait_barrier);
 
 		while (bio) { /* submit pending writes */
 			struct bio *next = bio->bi_next;
@@ -996,6 +1008,11 @@
 	/*
 	 * WRITE:
 	 */
+	if (conf->pending_count >= max_queued_requests) {
+		md_wakeup_thread(mddev->thread);
+		wait_event(conf->wait_barrier,
+			   conf->pending_count < max_queued_requests);
+	}
 	/* first select target devices under rcu_lock and
 	 * inc refcount on their rdev.  Record them by setting
 	 * bios[x] to bio
@@ -1129,6 +1146,7 @@
 		atomic_inc(&r10_bio->remaining);
 		spin_lock_irqsave(&conf->device_lock, flags);
 		bio_list_add(&conf->pending_bio_list, mbio);
+		conf->pending_count++;
 		spin_unlock_irqrestore(&conf->device_lock, flags);
 	}
 
@@ -3086,3 +3104,5 @@
 MODULE_ALIAS("md-personality-9"); /* RAID10 */
 MODULE_ALIAS("md-raid10");
 MODULE_ALIAS("md-level-10");
+
+module_param(max_queued_requests, int, S_IRUGO|S_IWUSR);