dm: move deferred bio flushing to workqueue

Add a single-thread workqueue for each mapped device
and move flushing of the lists of pushback and deferred bios
to this new workqueue.

Signed-off-by: Milan Broz <mbroz@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 9ca012e..6617ce4 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -71,6 +71,19 @@
 #define DMF_DELETING 4
 #define DMF_NOFLUSH_SUSPENDING 5
 
+/*
+ * Work processed by per-device workqueue.
+ */
+struct dm_wq_req {
+	enum {
+		DM_WQ_FLUSH_ALL,
+		DM_WQ_FLUSH_DEFERRED,
+	} type;
+	struct work_struct work;
+	struct mapped_device *md;
+	void *context;
+};
+
 struct mapped_device {
 	struct rw_semaphore io_lock;
 	struct mutex suspend_lock;
@@ -96,6 +109,11 @@
 	struct bio_list pushback;
 
 	/*
+	 * Processing queue (flush/barriers)
+	 */
+	struct workqueue_struct *wq;
+
+	/*
 	 * The current mapping.
 	 */
 	struct dm_table *map;
@@ -1044,6 +1062,10 @@
 	add_disk(md->disk);
 	format_dev_t(md->name, MKDEV(_major, minor));
 
+	md->wq = create_singlethread_workqueue("kdmflush");
+	if (!md->wq)
+		goto bad_thread;
+
 	/* Populate the mapping, nobody knows we exist yet */
 	spin_lock(&_minor_lock);
 	old_md = idr_replace(&_minor_idr, md, minor);
@@ -1053,6 +1075,8 @@
 
 	return md;
 
+bad_thread:
+	put_disk(md->disk);
 bad_disk:
 	bioset_free(md->bs);
 bad_no_bioset:
@@ -1080,6 +1104,7 @@
 		unlock_fs(md);
 		bdput(md->suspended_bdev);
 	}
+	destroy_workqueue(md->wq);
 	mempool_destroy(md->tio_pool);
 	mempool_destroy(md->io_pool);
 	bioset_free(md->bs);
@@ -1308,6 +1333,44 @@
 	spin_unlock_irqrestore(&md->pushback_lock, flags);
 }
 
+static void dm_wq_work(struct work_struct *work)
+{
+	struct dm_wq_req *req = container_of(work, struct dm_wq_req, work);
+	struct mapped_device *md = req->md;
+
+	down_write(&md->io_lock);
+	switch (req->type) {
+	case DM_WQ_FLUSH_ALL:
+		__merge_pushback_list(md);
+		/* pass through */
+	case DM_WQ_FLUSH_DEFERRED:
+		__flush_deferred_io(md);
+		break;
+	default:
+		DMERR("dm_wq_work: unrecognised work type %d", req->type);
+		BUG();
+	}
+	up_write(&md->io_lock);
+}
+
+static void dm_wq_queue(struct mapped_device *md, int type, void *context,
+			struct dm_wq_req *req)
+{
+	req->type = type;
+	req->md = md;
+	req->context = context;
+	INIT_WORK(&req->work, dm_wq_work);
+	queue_work(md->wq, &req->work);
+}
+
+static void dm_queue_flush(struct mapped_device *md, int type, void *context)
+{
+	struct dm_wq_req req;
+
+	dm_wq_queue(md, type, context, &req);
+	flush_workqueue(md->wq);
+}
+
 /*
  * Swap in a new table (destroying old one).
  */
@@ -1450,9 +1513,7 @@
 
 	/* were we interrupted ? */
 	if (r < 0) {
-		down_write(&md->io_lock);
-		__flush_deferred_io(md);
-		up_write(&md->io_lock);
+		dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL);
 
 		unlock_fs(md);
 		goto out; /* pushback list is already flushed, so skip flush */
@@ -1463,16 +1524,12 @@
 	set_bit(DMF_SUSPENDED, &md->flags);
 
 flush_and_out:
-	if (r && noflush) {
+	if (r && noflush)
 		/*
 		 * Because there may be already I/Os in the pushback list,
 		 * flush them before return.
 		 */
-		down_write(&md->io_lock);
-		__merge_pushback_list(md);
-		__flush_deferred_io(md);
-		up_write(&md->io_lock);
-	}
+		dm_queue_flush(md, DM_WQ_FLUSH_ALL, NULL);
 
 out:
 	if (r && md->suspended_bdev) {
@@ -1504,9 +1561,7 @@
 	if (r)
 		goto out;
 
-	down_write(&md->io_lock);
-	__flush_deferred_io(md);
-	up_write(&md->io_lock);
+	dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL);
 
 	unlock_fs(md);