md/raid1: submit IO from originating thread instead of md thread. queuing writes to the md thread means that all requests go through the one processor which may not be able to keep up with very high request rates. So use the plugging infrastructure to submit all requests on unplug. If a 'schedule' is needed, we fall back on the old approach of handing the requests to the thread for it to handle. Signed-off-by: NeilBrown <neilb@suse.de>

commit: f54a9d0e59c4bea3db733921ca9147612a6f292c [log] [tgz]
author: NeilBrown <neilb@suse.de> Thu Aug 02 08:33:20 2012 +1000
committer: NeilBrown <neilb@suse.de> Thu Aug 02 08:33:20 2012 +1000
tree: e17aa5d2a7220cfe7466cc4f8c74af5ed672a591
parent: 46a06401f6ba13e59d24746fa9ffa6773b69eee3 [diff]
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 15dbe03..94e7f6b 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c

@@ -1305,7 +1305,7 @@
 			prepare_to_wait(&bitmap->overflow_wait, &__wait,
 					TASK_UNINTERRUPTIBLE);
 			spin_unlock_irq(&bitmap->counts.lock);
-			io_schedule();
+			schedule();
 			finish_wait(&bitmap->overflow_wait, &__wait);
 			continue;
 		}

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 36a8fc0..9f01870 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c

@@ -870,6 +870,44 @@
 	pr_debug("%dB behind alloc failed, doing sync I/O\n", bio->bi_size);
 }
 
+struct raid1_plug_cb {
+	struct blk_plug_cb	cb;
+	struct bio_list		pending;
+	int			pending_cnt;
+};
+
+static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
+{
+	struct raid1_plug_cb *plug = container_of(cb, struct raid1_plug_cb,
+						  cb);
+	struct mddev *mddev = plug->cb.data;
+	struct r1conf *conf = mddev->private;
+	struct bio *bio;
+
+	if (from_schedule) {
+		spin_lock_irq(&conf->device_lock);
+		bio_list_merge(&conf->pending_bio_list, &plug->pending);
+		conf->pending_count += plug->pending_cnt;
+		spin_unlock_irq(&conf->device_lock);
+		md_wakeup_thread(mddev->thread);
+		kfree(plug);
+		return;
+	}
+
+	/* we aren't scheduling, so we can do the write-out directly. */
+	bio = bio_list_get(&plug->pending);
+	bitmap_unplug(mddev->bitmap);
+	wake_up(&conf->wait_barrier);
+
+	while (bio) { /* submit pending writes */
+		struct bio *next = bio->bi_next;
+		bio->bi_next = NULL;
+		generic_make_request(bio);
+		bio = next;
+	}
+	kfree(plug);
+}
+
 static void make_request(struct mddev *mddev, struct bio * bio)
 {
 	struct r1conf *conf = mddev->private;
@@ -883,6 +921,8 @@
 	const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
 	const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
 	struct md_rdev *blocked_rdev;
+	struct blk_plug_cb *cb;
+	struct raid1_plug_cb *plug = NULL;
 	int first_clone;
 	int sectors_handled;
 	int max_sectors;
@@ -1185,11 +1225,22 @@
 		mbio->bi_private = r1_bio;
 
 		atomic_inc(&r1_bio->remaining);
+
+		cb = blk_check_plugged(raid1_unplug, mddev, sizeof(*plug));
+		if (cb)
+			plug = container_of(cb, struct raid1_plug_cb, cb);
+		else
+			plug = NULL;
 		spin_lock_irqsave(&conf->device_lock, flags);
-		bio_list_add(&conf->pending_bio_list, mbio);
-		conf->pending_count++;
+		if (plug) {
+			bio_list_add(&plug->pending, mbio);
+			plug->pending_cnt++;
+		} else {
+			bio_list_add(&conf->pending_bio_list, mbio);
+			conf->pending_count++;
+		}
 		spin_unlock_irqrestore(&conf->device_lock, flags);
-		if (!mddev_check_plugged(mddev))
+		if (!plug)
 			md_wakeup_thread(mddev->thread);
 	}
 	/* Mustn't call r1_bio_write_done before this next test,
commit	f54a9d0e59c4bea3db733921ca9147612a6f292c	[log] [tgz]
author	NeilBrown <neilb@suse.de>	Thu Aug 02 08:33:20 2012 +1000
committer	NeilBrown <neilb@suse.de>	Thu Aug 02 08:33:20 2012 +1000
tree	e17aa5d2a7220cfe7466cc4f8c74af5ed672a591
parent	46a06401f6ba13e59d24746fa9ffa6773b69eee3 [diff]