blk-mq: don't allocate driver tag upfront for flush rq
The idea behind it is simple:
1) for none scheduler, driver tag has to be borrowed for flush rq,
otherwise we may run out of tag, and that causes an IO hang. And
get/put driver tag is actually noop for none, so reordering tags
isn't necessary at all.
2) for a real I/O scheduler, we need not allocate a driver tag upfront
for flush rq. It works just fine to follow the same approach as
normal requests: allocate driver tag for each rq just before calling
->queue_rq().
One driver visible change is that the driver tag isn't shared in the
flush request sequence. That won't be a problem, since we always do that
in legacy path.
Then flush rq need not be treated specially wrt. get/put driver tag.
This cleans up the code - for instance, reorder_tags_to_front() can be
removed, and we needn't worry about request ordering in dispatch list
for avoiding I/O deadlock.
Also we have to put the driver tag before requeueing.
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/block/blk-flush.c b/block/blk-flush.c
index a9773d2..f171706 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -231,8 +231,13 @@
/* release the tag's ownership to the req cloned from */
spin_lock_irqsave(&fq->mq_flush_lock, flags);
hctx = blk_mq_map_queue(q, flush_rq->mq_ctx->cpu);
- blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
- flush_rq->tag = -1;
+ if (!q->elevator) {
+ blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
+ flush_rq->tag = -1;
+ } else {
+ blk_mq_put_driver_tag_hctx(hctx, flush_rq);
+ flush_rq->internal_tag = -1;
+ }
}
running = &fq->flush_queue[fq->flush_running_idx];
@@ -318,19 +323,26 @@
blk_rq_init(q, flush_rq);
/*
- * Borrow tag from the first request since they can't
- * be in flight at the same time. And acquire the tag's
- * ownership for flush req.
+ * In case of none scheduler, borrow tag from the first request
+ * since they can't be in flight at the same time. And acquire
+ * the tag's ownership for flush req.
+ *
+ * In case of IO scheduler, flush rq need to borrow scheduler tag
+ * just for cheating put/get driver tag.
*/
if (q->mq_ops) {
struct blk_mq_hw_ctx *hctx;
flush_rq->mq_ctx = first_rq->mq_ctx;
- flush_rq->tag = first_rq->tag;
- fq->orig_rq = first_rq;
- hctx = blk_mq_map_queue(q, first_rq->mq_ctx->cpu);
- blk_mq_tag_set_rq(hctx, first_rq->tag, flush_rq);
+ if (!q->elevator) {
+ fq->orig_rq = first_rq;
+ flush_rq->tag = first_rq->tag;
+ hctx = blk_mq_map_queue(q, first_rq->mq_ctx->cpu);
+ blk_mq_tag_set_rq(hctx, first_rq->tag, flush_rq);
+ } else {
+ flush_rq->internal_tag = first_rq->internal_tag;
+ }
}
flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH;
@@ -394,6 +406,11 @@
hctx = blk_mq_map_queue(q, ctx->cpu);
+ if (q->elevator) {
+ WARN_ON(rq->tag < 0);
+ blk_mq_put_driver_tag_hctx(hctx, rq);
+ }
+
/*
* After populating an empty queue, kick it to avoid stall. Read
* the comment in flush_end_io().