blk-mq: add a 'list' parameter to ->queue_rq()

Since we have the notion of a 'last' request in a chain, we can use
this to have the hardware optimize the issuing of requests. Add
a list_head parameter to queue_rq that the driver can use to
temporarily store hw commands for issue when 'last' is true. If we
are doing a chain of requests, pass in a NULL list for the first
request to force issue of that immediately, then batch the remainder
for deferred issue until the last request has been sent.

Instead of adding yet another argument to the hot ->queue_rq path,
encapsulate the passed arguments in a blk_mq_queue_data structure.
This is passed as a constant, and has been tested as faster than
passing 4 (or even 3) args through ->queue_rq. Update drivers for
the new ->queue_rq() prototype. There are no functional changes
in this patch for drivers - if they don't use the passed in list,
then they will just queue requests individually like before.

Signed-off-by: Jens Axboe <axboe@fb.com>
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 1bd5f52..3bd7ca9 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3775,9 +3775,10 @@
 	return false;
 }
 
-static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq,
-		bool last)
+static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
+			 const struct blk_mq_queue_data *bd)
 {
+	struct request *rq = bd->rq;
 	int ret;
 
 	if (unlikely(mtip_check_unal_depth(hctx, rq)))
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 2671a3f..8433bc8 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -313,15 +313,15 @@
 	}
 }
 
-static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq,
-		bool last)
+static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
+			 const struct blk_mq_queue_data *bd)
 {
-	struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
+	struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
 
-	cmd->rq = rq;
+	cmd->rq = bd->rq;
 	cmd->nq = hctx->driver_data;
 
-	blk_mq_start_request(rq);
+	blk_mq_start_request(bd->rq);
 
 	null_handle_cmd(cmd);
 	return BLK_MQ_RQ_QUEUE_OK;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index c6a27d5..cecd3f9 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -158,10 +158,11 @@
 	spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
 }
 
-static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req,
-		bool last)
+static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
+			   const struct blk_mq_queue_data *bd)
 {
 	struct virtio_blk *vblk = hctx->queue->queuedata;
+	struct request *req = bd->rq;
 	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
 	unsigned long flags;
 	unsigned int num;
@@ -222,7 +223,7 @@
 		return BLK_MQ_RQ_QUEUE_ERROR;
 	}
 
-	if (last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
+	if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
 		notify = true;
 	spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 9eff8a3..161dcc9 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1858,9 +1858,10 @@
 	blk_mq_complete_request(cmd->request);
 }
 
-static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req,
-		bool last)
+static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
+			 const struct blk_mq_queue_data *bd)
 {
+	struct request *req = bd->rq;
 	struct request_queue *q = req->q;
 	struct scsi_device *sdev = q->queuedata;
 	struct Scsi_Host *shost = sdev->host;