Blame - block/blk-mq-sched.c - kernel/msm-4.19

blob: 55c0a745b4277ac86caa1b8eac68d48ccf6d9be9 [file] [log] [blame]

Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	1	/*
				2	* blk-mq scheduling framework
				3	*
				4	* Copyright (C) 2016 Jens Axboe
				5	*/
				6	#include <linux/kernel.h>
				7	#include <linux/module.h>
				8	#include <linux/blk-mq.h>
				9
				10	#include <trace/events/block.h>
				11
				12	#include "blk.h"
				13	#include "blk-mq.h"
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	14	#include "blk-mq-debugfs.h"
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	15	#include "blk-mq-sched.h"
				16	#include "blk-mq-tag.h"
				17	#include "blk-wbt.h"
				18
				19	void blk_mq_sched_free_hctx_data(struct request_queue *q,
				20	void (exit)(struct blk_mq_hw_ctx ))
				21	{
				22	struct blk_mq_hw_ctx *hctx;
				23	int i;
				24
				25	queue_for_each_hw_ctx(q, hctx, i) {
				26	if (exit && hctx->sched_data)
				27	exit(hctx);
				28	kfree(hctx->sched_data);
				29	hctx->sched_data = NULL;
				30	}
				31	}
				32	EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
				33
Christoph Hellwig	44e8c2b	2017-06-16 18:15:25 +0200	[diff] [blame]	34	void blk_mq_sched_assign_ioc(struct request rq, struct bio bio)
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	35	{
Christoph Hellwig	44e8c2b	2017-06-16 18:15:25 +0200	[diff] [blame]	36	struct request_queue *q = rq->q;
				37	struct io_context *ioc = rq_ioc(bio);
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	38	struct io_cq *icq;
				39
				40	spin_lock_irq(q->queue_lock);
				41	icq = ioc_lookup_icq(ioc, q);
				42	spin_unlock_irq(q->queue_lock);
				43
				44	if (!icq) {
				45	icq = ioc_create_icq(ioc, q, GFP_ATOMIC);
				46	if (!icq)
				47	return;
				48	}
Christoph Hellwig	ea511e3	2017-06-16 18:15:20 +0200	[diff] [blame]	49	get_io_context(icq->ioc);
Christoph Hellwig	44e8c2b	2017-06-16 18:15:25 +0200	[diff] [blame]	50	rq->elv.icq = icq;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	51	}
				52
Jens Axboe	8e8320c	2017-06-20 17:56:13 -0600	[diff] [blame]	53	/*
				54	* Mark a hardware queue as needing a restart. For shared queues, maintain
				55	* a count of how many hardware queues are marked for restart.
				56	*/
				57	static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
				58	{
				59	if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
				60	return;
				61
				62	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
				63	struct request_queue *q = hctx->queue;
				64
				65	if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
				66	atomic_inc(&q->shared_hctx_restart);
				67	} else
				68	set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
				69	}
				70
Jens Axboe	05b7941	2017-11-08 10:38:29 -0700	[diff] [blame]	71	static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
Jens Axboe	8e8320c	2017-06-20 17:56:13 -0600	[diff] [blame]	72	{
				73	if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
Jens Axboe	05b7941	2017-11-08 10:38:29 -0700	[diff] [blame]	74	return false;
Jens Axboe	8e8320c	2017-06-20 17:56:13 -0600	[diff] [blame]	75
Jens Axboe	05b7941	2017-11-08 10:38:29 -0700	[diff] [blame]	76	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
				77	struct request_queue *q = hctx->queue;
				78
				79	if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
				80	atomic_dec(&q->shared_hctx_restart);
				81	} else
				82	clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
Jens Axboe	8e8320c	2017-06-20 17:56:13 -0600	[diff] [blame]	83
Jens Axboe	79f720a	2017-11-10 09:13:21 -0700	[diff] [blame]	84	return blk_mq_run_hw_queue(hctx, true);
Jens Axboe	8e8320c	2017-06-20 17:56:13 -0600	[diff] [blame]	85	}
				86
Ming Lei	1f460b6	2017-10-27 12:43:30 +0800	[diff] [blame]	87	/*
				88	* Only SCSI implements .get_budget and .put_budget, and SCSI restarts
				89	* its queue by itself in its completion handler, so we don't need to
				90	* restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
				91	*/
				92	static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	93	{
				94	struct request_queue *q = hctx->queue;
				95	struct elevator_queue *e = q->elevator;
				96	LIST_HEAD(rq_list);
				97
				98	do {
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame]	99	struct request *rq;
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	100
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame]	101	if (e->type->ops.mq.has_work &&
				102	!e->type->ops.mq.has_work(hctx))
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	103	break;
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame]	104
Ming Lei	88022d7	2017-11-05 02:21:12 +0800	[diff] [blame]	105	if (!blk_mq_get_dispatch_budget(hctx))
Ming Lei	1f460b6	2017-10-27 12:43:30 +0800	[diff] [blame]	106	break;
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame]	107
				108	rq = e->type->ops.mq.dispatch_request(hctx);
				109	if (!rq) {
				110	blk_mq_put_dispatch_budget(hctx);
				111	break;
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame]	112	}
				113
				114	/*
				115	* Now this rq owns the budget which has to be released
				116	* if this rq won't be queued to driver via .queue_rq()
				117	* in blk_mq_dispatch_rq_list().
				118	*/
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	119	list_add(&rq->queuelist, &rq_list);
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame]	120	} while (blk_mq_dispatch_rq_list(q, &rq_list, true));
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	121	}
				122
Ming Lei	b347689	2017-10-14 17:22:30 +0800	[diff] [blame]	123	static struct blk_mq_ctx blk_mq_next_ctx(struct blk_mq_hw_ctx hctx,
				124	struct blk_mq_ctx *ctx)
				125	{
				126	unsigned idx = ctx->index_hw;
				127
				128	if (++idx == hctx->nr_ctx)
				129	idx = 0;
				130
				131	return hctx->ctxs[idx];
				132	}
				133
Ming Lei	1f460b6	2017-10-27 12:43:30 +0800	[diff] [blame]	134	/*
				135	* Only SCSI implements .get_budget and .put_budget, and SCSI restarts
				136	* its queue by itself in its completion handler, so we don't need to
				137	* restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
				138	*/
				139	static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
Ming Lei	b347689	2017-10-14 17:22:30 +0800	[diff] [blame]	140	{
				141	struct request_queue *q = hctx->queue;
				142	LIST_HEAD(rq_list);
				143	struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from);
				144
				145	do {
				146	struct request *rq;
Ming Lei	b347689	2017-10-14 17:22:30 +0800	[diff] [blame]	147
				148	if (!sbitmap_any_bit_set(&hctx->ctx_map))
				149	break;
				150
Ming Lei	88022d7	2017-11-05 02:21:12 +0800	[diff] [blame]	151	if (!blk_mq_get_dispatch_budget(hctx))
Ming Lei	1f460b6	2017-10-27 12:43:30 +0800	[diff] [blame]	152	break;
Ming Lei	b347689	2017-10-14 17:22:30 +0800	[diff] [blame]	153
				154	rq = blk_mq_dequeue_from_ctx(hctx, ctx);
				155	if (!rq) {
				156	blk_mq_put_dispatch_budget(hctx);
				157	break;
Ming Lei	b347689	2017-10-14 17:22:30 +0800	[diff] [blame]	158	}
				159
				160	/*
				161	* Now this rq owns the budget which has to be released
				162	* if this rq won't be queued to driver via .queue_rq()
				163	* in blk_mq_dispatch_rq_list().
				164	*/
				165	list_add(&rq->queuelist, &rq_list);
				166
				167	/* round robin for fair dispatch */
				168	ctx = blk_mq_next_ctx(hctx, rq->mq_ctx);
				169
				170	} while (blk_mq_dispatch_rq_list(q, &rq_list, true));
				171
				172	WRITE_ONCE(hctx->dispatch_from, ctx);
Ming Lei	b347689	2017-10-14 17:22:30 +0800	[diff] [blame]	173	}
				174
Ming Lei	1f460b6	2017-10-27 12:43:30 +0800	[diff] [blame]	175	void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	176	{
Omar Sandoval	81380ca	2017-04-07 08:56:26 -0600	[diff] [blame]	177	struct request_queue *q = hctx->queue;
				178	struct elevator_queue *e = q->elevator;
Jens Axboe	64765a7	2017-02-17 11:39:26 -0700	[diff] [blame]	179	const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	180	LIST_HEAD(rq_list);
				181
Ming Lei	f4560ff	2017-06-18 14:24:27 -0600	[diff] [blame]	182	/* RCU or SRCU read lock is needed before checking quiesced flag */
				183	if (unlikely(blk_mq_hctx_stopped(hctx) \|\| blk_queue_quiesced(q)))
Ming Lei	1f460b6	2017-10-27 12:43:30 +0800	[diff] [blame]	184	return;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	185
				186	hctx->run++;
				187
				188	/*
				189	* If we have previous entries on our dispatch list, grab them first for
				190	* more fair dispatch.
				191	*/
				192	if (!list_empty_careful(&hctx->dispatch)) {
				193	spin_lock(&hctx->lock);
				194	if (!list_empty(&hctx->dispatch))
				195	list_splice_init(&hctx->dispatch, &rq_list);
				196	spin_unlock(&hctx->lock);
				197	}
				198
				199	/*
				200	* Only ask the scheduler for requests, if we didn't have residual
				201	* requests from the dispatch list. This is to avoid the case where
				202	* we only ever dispatch a fraction of the requests available because
				203	* of low device queue depth. Once we pull requests out of the IO
				204	* scheduler, we can no longer merge or sort them. So it's best to
				205	* leave them there for as long as we can. Mark the hw queue as
				206	* needing a restart in that case.
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	207	*
Ming Lei	5e3d02b	2017-10-14 17:22:25 +0800	[diff] [blame]	208	* We want to dispatch from the scheduler if there was nothing
				209	* on the dispatch list or we were able to dispatch from the
				210	* dispatch list.
Jens Axboe	64765a7	2017-02-17 11:39:26 -0700	[diff] [blame]	211	*/
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	212	if (!list_empty(&rq_list)) {
				213	blk_mq_sched_mark_restart_hctx(hctx);
Ming Lei	b347689	2017-10-14 17:22:30 +0800	[diff] [blame]	214	if (blk_mq_dispatch_rq_list(q, &rq_list, false)) {
				215	if (has_sched_dispatch)
Ming Lei	1f460b6	2017-10-27 12:43:30 +0800	[diff] [blame]	216	blk_mq_do_dispatch_sched(hctx);
Ming Lei	b347689	2017-10-14 17:22:30 +0800	[diff] [blame]	217	else
Ming Lei	1f460b6	2017-10-27 12:43:30 +0800	[diff] [blame]	218	blk_mq_do_dispatch_ctx(hctx);
Ming Lei	b347689	2017-10-14 17:22:30 +0800	[diff] [blame]	219	}
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	220	} else if (has_sched_dispatch) {
Ming Lei	1f460b6	2017-10-27 12:43:30 +0800	[diff] [blame]	221	blk_mq_do_dispatch_sched(hctx);
Ming Lei	b347689	2017-10-14 17:22:30 +0800	[diff] [blame]	222	} else if (q->mq_ops->get_budget) {
				223	/*
				224	* If we need to get budget before queuing request, we
				225	* dequeue request one by one from sw queue for avoiding
				226	* to mess up I/O merge when dispatch runs out of resource.
				227	*
				228	* TODO: get more budgets, and dequeue more requests in
				229	* one time.
				230	*/
Ming Lei	1f460b6	2017-10-27 12:43:30 +0800	[diff] [blame]	231	blk_mq_do_dispatch_ctx(hctx);
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	232	} else {
				233	blk_mq_flush_busy_ctxs(hctx, &rq_list);
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame]	234	blk_mq_dispatch_rq_list(q, &rq_list, false);
Jens Axboe	c13660a	2017-01-26 12:40:07 -0700	[diff] [blame]	235	}
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	236	}
				237
Jens Axboe	e4d750c	2017-02-03 09:48:28 -0700	[diff] [blame]	238	bool blk_mq_sched_try_merge(struct request_queue q, struct bio bio,
				239	struct request **merged_request)
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	240	{
				241	struct request *rq;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	242
Christoph Hellwig	34fe7c0	2017-02-08 14:46:48 +0100	[diff] [blame]	243	switch (elv_merge(q, &rq, bio)) {
				244	case ELEVATOR_BACK_MERGE:
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	245	if (!blk_mq_sched_allow_merge(q, rq, bio))
				246	return false;
Christoph Hellwig	34fe7c0	2017-02-08 14:46:48 +0100	[diff] [blame]	247	if (!bio_attempt_back_merge(q, rq, bio))
				248	return false;
				249	*merged_request = attempt_back_merge(q, rq);
				250	if (!*merged_request)
				251	elv_merged_request(q, rq, ELEVATOR_BACK_MERGE);
				252	return true;
				253	case ELEVATOR_FRONT_MERGE:
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	254	if (!blk_mq_sched_allow_merge(q, rq, bio))
				255	return false;
Christoph Hellwig	34fe7c0	2017-02-08 14:46:48 +0100	[diff] [blame]	256	if (!bio_attempt_front_merge(q, rq, bio))
				257	return false;
				258	*merged_request = attempt_front_merge(q, rq);
				259	if (!*merged_request)
				260	elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
				261	return true;
				262	default:
				263	return false;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	264	}
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	265	}
				266	EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
				267
Ming Lei	9bddeb2	2017-05-26 19:53:20 +0800	[diff] [blame]	268	/*
				269	* Reverse check our software queue for entries that we could potentially
				270	* merge with. Currently includes a hand-wavy stop count of 8, to not spend
				271	* too much time checking for merges.
				272	*/
				273	static bool blk_mq_attempt_merge(struct request_queue *q,
				274	struct blk_mq_ctx ctx, struct bio bio)
				275	{
				276	struct request *rq;
				277	int checked = 8;
				278
Bart Van Assche	7b60781	2017-06-20 11:15:47 -0700	[diff] [blame]	279	lockdep_assert_held(&ctx->lock);
				280
Ming Lei	9bddeb2	2017-05-26 19:53:20 +0800	[diff] [blame]	281	list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
				282	bool merged = false;
				283
				284	if (!checked--)
				285	break;
				286
				287	if (!blk_rq_merge_ok(rq, bio))
				288	continue;
				289
				290	switch (blk_try_merge(rq, bio)) {
				291	case ELEVATOR_BACK_MERGE:
				292	if (blk_mq_sched_allow_merge(q, rq, bio))
				293	merged = bio_attempt_back_merge(q, rq, bio);
				294	break;
				295	case ELEVATOR_FRONT_MERGE:
				296	if (blk_mq_sched_allow_merge(q, rq, bio))
				297	merged = bio_attempt_front_merge(q, rq, bio);
				298	break;
				299	case ELEVATOR_DISCARD_MERGE:
				300	merged = bio_attempt_discard_merge(q, rq, bio);
				301	break;
				302	default:
				303	continue;
				304	}
				305
				306	if (merged)
				307	ctx->rq_merged++;
				308	return merged;
				309	}
				310
				311	return false;
				312	}
				313
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	314	bool __blk_mq_sched_bio_merge(struct request_queue q, struct bio bio)
				315	{
				316	struct elevator_queue *e = q->elevator;
Ming Lei	9bddeb2	2017-05-26 19:53:20 +0800	[diff] [blame]	317	struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
				318	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
				319	bool ret = false;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	320
Ming Lei	9bddeb2	2017-05-26 19:53:20 +0800	[diff] [blame]	321	if (e && e->type->ops.mq.bio_merge) {
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	322	blk_mq_put_ctx(ctx);
				323	return e->type->ops.mq.bio_merge(hctx, bio);
				324	}
				325
Ming Lei	9bddeb2	2017-05-26 19:53:20 +0800	[diff] [blame]	326	if (hctx->flags & BLK_MQ_F_SHOULD_MERGE) {
				327	/* default per sw-queue merge */
				328	spin_lock(&ctx->lock);
				329	ret = blk_mq_attempt_merge(q, ctx, bio);
				330	spin_unlock(&ctx->lock);
				331	}
				332
				333	blk_mq_put_ctx(ctx);
				334	return ret;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	335	}
				336
				337	bool blk_mq_sched_try_insert_merge(struct request_queue q, struct request rq)
				338	{
				339	return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq);
				340	}
				341	EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
				342
				343	void blk_mq_sched_request_inserted(struct request *rq)
				344	{
				345	trace_block_rq_insert(rq->q, rq);
				346	}
				347	EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
				348
Omar Sandoval	0cacba6	2017-02-02 15:42:39 -0800	[diff] [blame]	349	static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
Ming Lei	a6a252e	2017-11-02 23:24:36 +0800	[diff] [blame]	350	bool has_sched,
Omar Sandoval	0cacba6	2017-02-02 15:42:39 -0800	[diff] [blame]	351	struct request *rq)
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	352	{
Ming Lei	a6a252e	2017-11-02 23:24:36 +0800	[diff] [blame]	353	/* dispatch flush rq directly */
				354	if (rq->rq_flags & RQF_FLUSH_SEQ) {
				355	spin_lock(&hctx->lock);
				356	list_add(&rq->queuelist, &hctx->dispatch);
				357	spin_unlock(&hctx->lock);
				358	return true;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	359	}
				360
Ming Lei	923218f	2017-11-02 23:24:38 +0800	[diff] [blame]	361	if (has_sched)
Ming Lei	a6a252e	2017-11-02 23:24:36 +0800	[diff] [blame]	362	rq->rq_flags \|= RQF_SORTED;
Ming Lei	a6a252e	2017-11-02 23:24:36 +0800	[diff] [blame]	363
				364	return false;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	365	}
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	366
Jens Axboe	05b7941	2017-11-08 10:38:29 -0700	[diff] [blame]	367	/**
				368	* list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
				369	* @pos: loop cursor.
				370	* @skip: the list element that will not be examined. Iteration starts at
				371	* @skip->next.
				372	* @head: head of the list to examine. This list must have at least one
				373	* element, namely @skip.
				374	* @member: name of the list_head structure within typeof(*pos).
				375	*/
				376	#define list_for_each_entry_rcu_rr(pos, skip, head, member) \
				377	for ((pos) = (skip); \
				378	(pos = (pos)->member.next != (head) ? list_entry_rcu( \
				379	(pos)->member.next, typeof(*pos), member) : \
				380	list_entry_rcu((pos)->member.next->next, typeof(*pos), member)), \
				381	(pos) != (skip); )
				382
				383	/*
				384	* Called after a driver tag has been freed to check whether a hctx needs to
				385	* be restarted. Restarts @hctx if its tag set is not shared. Restarts hardware
				386	* queues in a round-robin fashion if the tag set of @hctx is shared with other
				387	* hardware queues.
				388	*/
				389	void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
				390	{
				391	struct blk_mq_tags *const tags = hctx->tags;
				392	struct blk_mq_tag_set *const set = hctx->queue->tag_set;
				393	struct request_queue const queue = hctx->queue, q;
				394	struct blk_mq_hw_ctx *hctx2;
				395	unsigned int i, j;
				396
				397	if (set->flags & BLK_MQ_F_TAG_SHARED) {
				398	/*
				399	* If this is 0, then we know that no hardware queues
				400	* have RESTART marked. We're done.
				401	*/
				402	if (!atomic_read(&queue->shared_hctx_restart))
				403	return;
				404
				405	rcu_read_lock();
				406	list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
				407	tag_set_list) {
				408	queue_for_each_hw_ctx(q, hctx2, i)
				409	if (hctx2->tags == tags &&
				410	blk_mq_sched_restart_hctx(hctx2))
				411	goto done;
				412	}
				413	j = hctx->queue_num + 1;
				414	for (i = 0; i < queue->nr_hw_queues; i++, j++) {
				415	if (j == queue->nr_hw_queues)
				416	j = 0;
				417	hctx2 = queue->queue_hw_ctx[j];
				418	if (hctx2->tags == tags &&
				419	blk_mq_sched_restart_hctx(hctx2))
				420	break;
				421	}
				422	done:
				423	rcu_read_unlock();
				424	} else {
				425	blk_mq_sched_restart_hctx(hctx);
				426	}
				427	}
				428
Jens Axboe	bd6737f	2017-01-27 01:00:47 -0700	[diff] [blame]	429	void blk_mq_sched_insert_request(struct request *rq, bool at_head,
Mike Snitzer	9e97d29	2018-01-17 11:25:58 -0500	[diff] [blame]	430	bool run_queue, bool async)
Jens Axboe	bd6737f	2017-01-27 01:00:47 -0700	[diff] [blame]	431	{
				432	struct request_queue *q = rq->q;
				433	struct elevator_queue *e = q->elevator;
				434	struct blk_mq_ctx *ctx = rq->mq_ctx;
				435	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
				436
Ming Lei	a6a252e	2017-11-02 23:24:36 +0800	[diff] [blame]	437	/* flush rq in flush machinery need to be dispatched directly */
				438	if (!(rq->rq_flags & RQF_FLUSH_SEQ) && op_is_flush(rq->cmd_flags)) {
Ming Lei	923218f	2017-11-02 23:24:38 +0800	[diff] [blame]	439	blk_insert_flush(rq);
				440	goto run;
Jens Axboe	bd6737f	2017-01-27 01:00:47 -0700	[diff] [blame]	441	}
				442
Ming Lei	923218f	2017-11-02 23:24:38 +0800	[diff] [blame]	443	WARN_ON(e && (rq->tag != -1));
				444
Ming Lei	a6a252e	2017-11-02 23:24:36 +0800	[diff] [blame]	445	if (blk_mq_sched_bypass_insert(hctx, !!e, rq))
Omar Sandoval	0cacba6	2017-02-02 15:42:39 -0800	[diff] [blame]	446	goto run;
				447
Jens Axboe	bd6737f	2017-01-27 01:00:47 -0700	[diff] [blame]	448	if (e && e->type->ops.mq.insert_requests) {
				449	LIST_HEAD(list);
				450
				451	list_add(&rq->queuelist, &list);
				452	e->type->ops.mq.insert_requests(hctx, &list, at_head);
				453	} else {
				454	spin_lock(&ctx->lock);
				455	__blk_mq_insert_request(hctx, rq, at_head);
				456	spin_unlock(&ctx->lock);
				457	}
				458
Omar Sandoval	0cacba6	2017-02-02 15:42:39 -0800	[diff] [blame]	459	run:
Jens Axboe	bd6737f	2017-01-27 01:00:47 -0700	[diff] [blame]	460	if (run_queue)
				461	blk_mq_run_hw_queue(hctx, async);
				462	}
				463
				464	void blk_mq_sched_insert_requests(struct request_queue *q,
				465	struct blk_mq_ctx *ctx,
				466	struct list_head *list, bool run_queue_async)
				467	{
				468	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
				469	struct elevator_queue *e = hctx->queue->elevator;
				470
				471	if (e && e->type->ops.mq.insert_requests)
				472	e->type->ops.mq.insert_requests(hctx, list, false);
				473	else
				474	blk_mq_insert_requests(hctx, ctx, list);
				475
				476	blk_mq_run_hw_queue(hctx, run_queue_async);
				477	}
				478
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	479	static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
				480	struct blk_mq_hw_ctx *hctx,
				481	unsigned int hctx_idx)
				482	{
				483	if (hctx->sched_tags) {
				484	blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
				485	blk_mq_free_rq_map(hctx->sched_tags);
				486	hctx->sched_tags = NULL;
				487	}
				488	}
				489
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	490	static int blk_mq_sched_alloc_tags(struct request_queue *q,
				491	struct blk_mq_hw_ctx *hctx,
				492	unsigned int hctx_idx)
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	493	{
				494	struct blk_mq_tag_set *set = q->tag_set;
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	495	int ret;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	496
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	497	hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests,
				498	set->reserved_tags);
				499	if (!hctx->sched_tags)
				500	return -ENOMEM;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	501
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	502	ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests);
				503	if (ret)
				504	blk_mq_sched_free_tags(set, hctx, hctx_idx);
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	505
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	506	return ret;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	507	}
				508
Omar Sandoval	54d5329	2017-04-07 08:52:27 -0600	[diff] [blame]	509	static void blk_mq_sched_tags_teardown(struct request_queue *q)
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	510	{
				511	struct blk_mq_tag_set *set = q->tag_set;
				512	struct blk_mq_hw_ctx *hctx;
				513	int i;
				514
				515	queue_for_each_hw_ctx(q, hctx, i)
				516	blk_mq_sched_free_tags(set, hctx, i);
				517	}
Jens Axboe	d348499	2017-01-13 14:43:58 -0700	[diff] [blame]	518
Omar Sandoval	9325263	2017-04-05 12:01:31 -0700	[diff] [blame]	519	int blk_mq_sched_init_hctx(struct request_queue q, struct blk_mq_hw_ctx hctx,
				520	unsigned int hctx_idx)
				521	{
				522	struct elevator_queue *e = q->elevator;
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	523	int ret;
Omar Sandoval	9325263	2017-04-05 12:01:31 -0700	[diff] [blame]	524
				525	if (!e)
				526	return 0;
				527
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	528	ret = blk_mq_sched_alloc_tags(q, hctx, hctx_idx);
				529	if (ret)
				530	return ret;
				531
				532	if (e->type->ops.mq.init_hctx) {
				533	ret = e->type->ops.mq.init_hctx(hctx, hctx_idx);
				534	if (ret) {
				535	blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
				536	return ret;
				537	}
				538	}
				539
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	540	blk_mq_debugfs_register_sched_hctx(q, hctx);
				541
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	542	return 0;
Omar Sandoval	9325263	2017-04-05 12:01:31 -0700	[diff] [blame]	543	}
				544
				545	void blk_mq_sched_exit_hctx(struct request_queue q, struct blk_mq_hw_ctx hctx,
				546	unsigned int hctx_idx)
				547	{
				548	struct elevator_queue *e = q->elevator;
				549
				550	if (!e)
				551	return;
				552
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	553	blk_mq_debugfs_unregister_sched_hctx(hctx);
				554
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	555	if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
				556	e->type->ops.mq.exit_hctx(hctx, hctx_idx);
				557	hctx->sched_data = NULL;
				558	}
				559
Omar Sandoval	9325263	2017-04-05 12:01:31 -0700	[diff] [blame]	560	blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
				561	}
				562
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	563	int blk_mq_init_sched(struct request_queue q, struct elevator_type e)
				564	{
				565	struct blk_mq_hw_ctx *hctx;
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	566	struct elevator_queue *eq;
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	567	unsigned int i;
				568	int ret;
				569
				570	if (!e) {
				571	q->elevator = NULL;
				572	return 0;
				573	}
				574
				575	/*
Ming Lei	32825c4	2017-07-03 20:37:14 +0800	[diff] [blame]	576	* Default to double of smaller one between hw queue_depth and 128,
				577	* since we don't split into sync/async like the old code did.
				578	* Additionally, this is a per-hw queue depth.
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	579	*/
Ming Lei	32825c4	2017-07-03 20:37:14 +0800	[diff] [blame]	580	q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
				581	BLKDEV_MAX_RQ);
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	582
				583	queue_for_each_hw_ctx(q, hctx, i) {
				584	ret = blk_mq_sched_alloc_tags(q, hctx, i);
				585	if (ret)
				586	goto err;
				587	}
				588
				589	ret = e->ops.mq.init_sched(q, e);
				590	if (ret)
				591	goto err;
				592
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	593	blk_mq_debugfs_register_sched(q);
				594
				595	queue_for_each_hw_ctx(q, hctx, i) {
				596	if (e->ops.mq.init_hctx) {
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	597	ret = e->ops.mq.init_hctx(hctx, i);
				598	if (ret) {
				599	eq = q->elevator;
				600	blk_mq_exit_sched(q, eq);
				601	kobject_put(&eq->kobj);
				602	return ret;
				603	}
				604	}
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	605	blk_mq_debugfs_register_sched_hctx(q, hctx);
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	606	}
				607
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	608	return 0;
				609
				610	err:
Omar Sandoval	54d5329	2017-04-07 08:52:27 -0600	[diff] [blame]	611	blk_mq_sched_tags_teardown(q);
				612	q->elevator = NULL;
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	613	return ret;
				614	}
				615
Omar Sandoval	54d5329	2017-04-07 08:52:27 -0600	[diff] [blame]	616	void blk_mq_exit_sched(struct request_queue q, struct elevator_queue e)
				617	{
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	618	struct blk_mq_hw_ctx *hctx;
				619	unsigned int i;
				620
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	621	queue_for_each_hw_ctx(q, hctx, i) {
				622	blk_mq_debugfs_unregister_sched_hctx(hctx);
				623	if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
				624	e->type->ops.mq.exit_hctx(hctx, i);
				625	hctx->sched_data = NULL;
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	626	}
				627	}
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	628	blk_mq_debugfs_unregister_sched(q);
Omar Sandoval	54d5329	2017-04-07 08:52:27 -0600	[diff] [blame]	629	if (e->type->ops.mq.exit_sched)
				630	e->type->ops.mq.exit_sched(e);
				631	blk_mq_sched_tags_teardown(q);
				632	q->elevator = NULL;
				633	}
				634
Jens Axboe	d348499	2017-01-13 14:43:58 -0700	[diff] [blame]	635	int blk_mq_sched_init(struct request_queue *q)
				636	{
				637	int ret;
				638
Jens Axboe	d348499	2017-01-13 14:43:58 -0700	[diff] [blame]	639	mutex_lock(&q->sysfs_lock);
				640	ret = elevator_init(q, NULL);
				641	mutex_unlock(&q->sysfs_lock);
				642
				643	return ret;
				644	}