Blame - block/blk-mq-sched.c - kernel/msm-4.19

blob: c117bd8fd1f6126849472300b3c294e65825a2f7 [file] [log] [blame]

Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	1	/*
				2	* blk-mq scheduling framework
				3	*
				4	* Copyright (C) 2016 Jens Axboe
				5	*/
				6	#include <linux/kernel.h>
				7	#include <linux/module.h>
				8	#include <linux/blk-mq.h>
				9
				10	#include <trace/events/block.h>
				11
				12	#include "blk.h"
				13	#include "blk-mq.h"
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	14	#include "blk-mq-debugfs.h"
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	15	#include "blk-mq-sched.h"
				16	#include "blk-mq-tag.h"
				17	#include "blk-wbt.h"
				18
				19	void blk_mq_sched_free_hctx_data(struct request_queue *q,
				20	void (exit)(struct blk_mq_hw_ctx ))
				21	{
				22	struct blk_mq_hw_ctx *hctx;
				23	int i;
				24
				25	queue_for_each_hw_ctx(q, hctx, i) {
				26	if (exit && hctx->sched_data)
				27	exit(hctx);
				28	kfree(hctx->sched_data);
				29	hctx->sched_data = NULL;
				30	}
				31	}
				32	EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
				33
Christoph Hellwig	44e8c2b	2017-06-16 18:15:25 +0200	[diff] [blame]	34	void blk_mq_sched_assign_ioc(struct request rq, struct bio bio)
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	35	{
Christoph Hellwig	44e8c2b	2017-06-16 18:15:25 +0200	[diff] [blame]	36	struct request_queue *q = rq->q;
				37	struct io_context *ioc = rq_ioc(bio);
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	38	struct io_cq *icq;
				39
				40	spin_lock_irq(q->queue_lock);
				41	icq = ioc_lookup_icq(ioc, q);
				42	spin_unlock_irq(q->queue_lock);
				43
				44	if (!icq) {
				45	icq = ioc_create_icq(ioc, q, GFP_ATOMIC);
				46	if (!icq)
				47	return;
				48	}
Christoph Hellwig	ea511e3	2017-06-16 18:15:20 +0200	[diff] [blame]	49	get_io_context(icq->ioc);
Christoph Hellwig	44e8c2b	2017-06-16 18:15:25 +0200	[diff] [blame]	50	rq->elv.icq = icq;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	51	}
				52
Jens Axboe	8e8320c	2017-06-20 17:56:13 -0600	[diff] [blame]	53	/*
				54	* Mark a hardware queue as needing a restart. For shared queues, maintain
				55	* a count of how many hardware queues are marked for restart.
				56	*/
				57	static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
				58	{
				59	if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
				60	return;
				61
				62	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
				63	struct request_queue *q = hctx->queue;
				64
				65	if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
				66	atomic_inc(&q->shared_hctx_restart);
				67	} else
				68	set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
				69	}
				70
Jens Axboe	05b7941	2017-11-08 10:38:29 -0700	[diff] [blame]	71	static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
Jens Axboe	8e8320c	2017-06-20 17:56:13 -0600	[diff] [blame]	72	{
				73	if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
Jens Axboe	05b7941	2017-11-08 10:38:29 -0700	[diff] [blame]	74	return false;
Jens Axboe	8e8320c	2017-06-20 17:56:13 -0600	[diff] [blame]	75
Jens Axboe	05b7941	2017-11-08 10:38:29 -0700	[diff] [blame]	76	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
				77	struct request_queue *q = hctx->queue;
				78
				79	if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
				80	atomic_dec(&q->shared_hctx_restart);
				81	} else
				82	clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
Jens Axboe	8e8320c	2017-06-20 17:56:13 -0600	[diff] [blame]	83
Jens Axboe	79f720a	2017-11-10 09:13:21 -0700	[diff] [blame]	84	return blk_mq_run_hw_queue(hctx, true);
Jens Axboe	8e8320c	2017-06-20 17:56:13 -0600	[diff] [blame]	85	}
				86
Ming Lei	1f460b6	2017-10-27 12:43:30 +0800	[diff] [blame]	87	/*
				88	* Only SCSI implements .get_budget and .put_budget, and SCSI restarts
				89	* its queue by itself in its completion handler, so we don't need to
				90	* restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
				91	*/
				92	static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	93	{
				94	struct request_queue *q = hctx->queue;
				95	struct elevator_queue *e = q->elevator;
				96	LIST_HEAD(rq_list);
				97
				98	do {
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame]	99	struct request *rq;
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	100
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame]	101	if (e->type->ops.mq.has_work &&
				102	!e->type->ops.mq.has_work(hctx))
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	103	break;
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame]	104
Ming Lei	88022d7	2017-11-05 02:21:12 +0800	[diff] [blame]	105	if (!blk_mq_get_dispatch_budget(hctx))
Ming Lei	1f460b6	2017-10-27 12:43:30 +0800	[diff] [blame]	106	break;
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame]	107
				108	rq = e->type->ops.mq.dispatch_request(hctx);
				109	if (!rq) {
				110	blk_mq_put_dispatch_budget(hctx);
				111	break;
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame]	112	}
				113
				114	/*
				115	* Now this rq owns the budget which has to be released
				116	* if this rq won't be queued to driver via .queue_rq()
				117	* in blk_mq_dispatch_rq_list().
				118	*/
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	119	list_add(&rq->queuelist, &rq_list);
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame]	120	} while (blk_mq_dispatch_rq_list(q, &rq_list, true));
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	121	}
				122
Ming Lei	b347689	2017-10-14 17:22:30 +0800	[diff] [blame]	123	static struct blk_mq_ctx blk_mq_next_ctx(struct blk_mq_hw_ctx hctx,
				124	struct blk_mq_ctx *ctx)
				125	{
				126	unsigned idx = ctx->index_hw;
				127
				128	if (++idx == hctx->nr_ctx)
				129	idx = 0;
				130
				131	return hctx->ctxs[idx];
				132	}
				133
Ming Lei	1f460b6	2017-10-27 12:43:30 +0800	[diff] [blame]	134	/*
				135	* Only SCSI implements .get_budget and .put_budget, and SCSI restarts
				136	* its queue by itself in its completion handler, so we don't need to
				137	* restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
				138	*/
				139	static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
Ming Lei	b347689	2017-10-14 17:22:30 +0800	[diff] [blame]	140	{
				141	struct request_queue *q = hctx->queue;
				142	LIST_HEAD(rq_list);
				143	struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from);
				144
				145	do {
				146	struct request *rq;
Ming Lei	b347689	2017-10-14 17:22:30 +0800	[diff] [blame]	147
				148	if (!sbitmap_any_bit_set(&hctx->ctx_map))
				149	break;
				150
Ming Lei	88022d7	2017-11-05 02:21:12 +0800	[diff] [blame]	151	if (!blk_mq_get_dispatch_budget(hctx))
Ming Lei	1f460b6	2017-10-27 12:43:30 +0800	[diff] [blame]	152	break;
Ming Lei	b347689	2017-10-14 17:22:30 +0800	[diff] [blame]	153
				154	rq = blk_mq_dequeue_from_ctx(hctx, ctx);
				155	if (!rq) {
				156	blk_mq_put_dispatch_budget(hctx);
				157	break;
Ming Lei	b347689	2017-10-14 17:22:30 +0800	[diff] [blame]	158	}
				159
				160	/*
				161	* Now this rq owns the budget which has to be released
				162	* if this rq won't be queued to driver via .queue_rq()
				163	* in blk_mq_dispatch_rq_list().
				164	*/
				165	list_add(&rq->queuelist, &rq_list);
				166
				167	/* round robin for fair dispatch */
				168	ctx = blk_mq_next_ctx(hctx, rq->mq_ctx);
				169
				170	} while (blk_mq_dispatch_rq_list(q, &rq_list, true));
				171
				172	WRITE_ONCE(hctx->dispatch_from, ctx);
Ming Lei	b347689	2017-10-14 17:22:30 +0800	[diff] [blame]	173	}
				174
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame]	175	/* return true if hw queue need to be run again */
Ming Lei	1f460b6	2017-10-27 12:43:30 +0800	[diff] [blame]	176	void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	177	{
Omar Sandoval	81380ca	2017-04-07 08:56:26 -0600	[diff] [blame]	178	struct request_queue *q = hctx->queue;
				179	struct elevator_queue *e = q->elevator;
Jens Axboe	64765a7	2017-02-17 11:39:26 -0700	[diff] [blame]	180	const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	181	LIST_HEAD(rq_list);
				182
Ming Lei	f4560ff	2017-06-18 14:24:27 -0600	[diff] [blame]	183	/* RCU or SRCU read lock is needed before checking quiesced flag */
				184	if (unlikely(blk_mq_hctx_stopped(hctx) \|\| blk_queue_quiesced(q)))
Ming Lei	1f460b6	2017-10-27 12:43:30 +0800	[diff] [blame]	185	return;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	186
				187	hctx->run++;
				188
				189	/*
				190	* If we have previous entries on our dispatch list, grab them first for
				191	* more fair dispatch.
				192	*/
				193	if (!list_empty_careful(&hctx->dispatch)) {
				194	spin_lock(&hctx->lock);
				195	if (!list_empty(&hctx->dispatch))
				196	list_splice_init(&hctx->dispatch, &rq_list);
				197	spin_unlock(&hctx->lock);
				198	}
				199
				200	/*
				201	* Only ask the scheduler for requests, if we didn't have residual
				202	* requests from the dispatch list. This is to avoid the case where
				203	* we only ever dispatch a fraction of the requests available because
				204	* of low device queue depth. Once we pull requests out of the IO
				205	* scheduler, we can no longer merge or sort them. So it's best to
				206	* leave them there for as long as we can. Mark the hw queue as
				207	* needing a restart in that case.
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	208	*
Ming Lei	5e3d02b	2017-10-14 17:22:25 +0800	[diff] [blame]	209	* We want to dispatch from the scheduler if there was nothing
				210	* on the dispatch list or we were able to dispatch from the
				211	* dispatch list.
Jens Axboe	64765a7	2017-02-17 11:39:26 -0700	[diff] [blame]	212	*/
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	213	if (!list_empty(&rq_list)) {
				214	blk_mq_sched_mark_restart_hctx(hctx);
Ming Lei	b347689	2017-10-14 17:22:30 +0800	[diff] [blame]	215	if (blk_mq_dispatch_rq_list(q, &rq_list, false)) {
				216	if (has_sched_dispatch)
Ming Lei	1f460b6	2017-10-27 12:43:30 +0800	[diff] [blame]	217	blk_mq_do_dispatch_sched(hctx);
Ming Lei	b347689	2017-10-14 17:22:30 +0800	[diff] [blame]	218	else
Ming Lei	1f460b6	2017-10-27 12:43:30 +0800	[diff] [blame]	219	blk_mq_do_dispatch_ctx(hctx);
Ming Lei	b347689	2017-10-14 17:22:30 +0800	[diff] [blame]	220	}
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	221	} else if (has_sched_dispatch) {
Ming Lei	1f460b6	2017-10-27 12:43:30 +0800	[diff] [blame]	222	blk_mq_do_dispatch_sched(hctx);
Ming Lei	b347689	2017-10-14 17:22:30 +0800	[diff] [blame]	223	} else if (q->mq_ops->get_budget) {
				224	/*
				225	* If we need to get budget before queuing request, we
				226	* dequeue request one by one from sw queue for avoiding
				227	* to mess up I/O merge when dispatch runs out of resource.
				228	*
				229	* TODO: get more budgets, and dequeue more requests in
				230	* one time.
				231	*/
Ming Lei	1f460b6	2017-10-27 12:43:30 +0800	[diff] [blame]	232	blk_mq_do_dispatch_ctx(hctx);
Ming Lei	caf8eb0	2017-10-14 17:22:26 +0800	[diff] [blame]	233	} else {
				234	blk_mq_flush_busy_ctxs(hctx, &rq_list);
Ming Lei	de14829	2017-10-14 17:22:29 +0800	[diff] [blame]	235	blk_mq_dispatch_rq_list(q, &rq_list, false);
Jens Axboe	c13660a	2017-01-26 12:40:07 -0700	[diff] [blame]	236	}
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	237	}
				238
Jens Axboe	e4d750c	2017-02-03 09:48:28 -0700	[diff] [blame]	239	bool blk_mq_sched_try_merge(struct request_queue q, struct bio bio,
				240	struct request **merged_request)
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	241	{
				242	struct request *rq;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	243
Christoph Hellwig	34fe7c0	2017-02-08 14:46:48 +0100	[diff] [blame]	244	switch (elv_merge(q, &rq, bio)) {
				245	case ELEVATOR_BACK_MERGE:
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	246	if (!blk_mq_sched_allow_merge(q, rq, bio))
				247	return false;
Christoph Hellwig	34fe7c0	2017-02-08 14:46:48 +0100	[diff] [blame]	248	if (!bio_attempt_back_merge(q, rq, bio))
				249	return false;
				250	*merged_request = attempt_back_merge(q, rq);
				251	if (!*merged_request)
				252	elv_merged_request(q, rq, ELEVATOR_BACK_MERGE);
				253	return true;
				254	case ELEVATOR_FRONT_MERGE:
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	255	if (!blk_mq_sched_allow_merge(q, rq, bio))
				256	return false;
Christoph Hellwig	34fe7c0	2017-02-08 14:46:48 +0100	[diff] [blame]	257	if (!bio_attempt_front_merge(q, rq, bio))
				258	return false;
				259	*merged_request = attempt_front_merge(q, rq);
				260	if (!*merged_request)
				261	elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
				262	return true;
				263	default:
				264	return false;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	265	}
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	266	}
				267	EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
				268
Ming Lei	9bddeb2	2017-05-26 19:53:20 +0800	[diff] [blame]	269	/*
				270	* Reverse check our software queue for entries that we could potentially
				271	* merge with. Currently includes a hand-wavy stop count of 8, to not spend
				272	* too much time checking for merges.
				273	*/
				274	static bool blk_mq_attempt_merge(struct request_queue *q,
				275	struct blk_mq_ctx ctx, struct bio bio)
				276	{
				277	struct request *rq;
				278	int checked = 8;
				279
Bart Van Assche	7b60781	2017-06-20 11:15:47 -0700	[diff] [blame]	280	lockdep_assert_held(&ctx->lock);
				281
Ming Lei	9bddeb2	2017-05-26 19:53:20 +0800	[diff] [blame]	282	list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
				283	bool merged = false;
				284
				285	if (!checked--)
				286	break;
				287
				288	if (!blk_rq_merge_ok(rq, bio))
				289	continue;
				290
				291	switch (blk_try_merge(rq, bio)) {
				292	case ELEVATOR_BACK_MERGE:
				293	if (blk_mq_sched_allow_merge(q, rq, bio))
				294	merged = bio_attempt_back_merge(q, rq, bio);
				295	break;
				296	case ELEVATOR_FRONT_MERGE:
				297	if (blk_mq_sched_allow_merge(q, rq, bio))
				298	merged = bio_attempt_front_merge(q, rq, bio);
				299	break;
				300	case ELEVATOR_DISCARD_MERGE:
				301	merged = bio_attempt_discard_merge(q, rq, bio);
				302	break;
				303	default:
				304	continue;
				305	}
				306
				307	if (merged)
				308	ctx->rq_merged++;
				309	return merged;
				310	}
				311
				312	return false;
				313	}
				314
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	315	bool __blk_mq_sched_bio_merge(struct request_queue q, struct bio bio)
				316	{
				317	struct elevator_queue *e = q->elevator;
Ming Lei	9bddeb2	2017-05-26 19:53:20 +0800	[diff] [blame]	318	struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
				319	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
				320	bool ret = false;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	321
Ming Lei	9bddeb2	2017-05-26 19:53:20 +0800	[diff] [blame]	322	if (e && e->type->ops.mq.bio_merge) {
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	323	blk_mq_put_ctx(ctx);
				324	return e->type->ops.mq.bio_merge(hctx, bio);
				325	}
				326
Ming Lei	9bddeb2	2017-05-26 19:53:20 +0800	[diff] [blame]	327	if (hctx->flags & BLK_MQ_F_SHOULD_MERGE) {
				328	/* default per sw-queue merge */
				329	spin_lock(&ctx->lock);
				330	ret = blk_mq_attempt_merge(q, ctx, bio);
				331	spin_unlock(&ctx->lock);
				332	}
				333
				334	blk_mq_put_ctx(ctx);
				335	return ret;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	336	}
				337
				338	bool blk_mq_sched_try_insert_merge(struct request_queue q, struct request rq)
				339	{
				340	return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq);
				341	}
				342	EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
				343
				344	void blk_mq_sched_request_inserted(struct request *rq)
				345	{
				346	trace_block_rq_insert(rq->q, rq);
				347	}
				348	EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
				349
Omar Sandoval	0cacba6	2017-02-02 15:42:39 -0800	[diff] [blame]	350	static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
Ming Lei	a6a252e	2017-11-02 23:24:36 +0800	[diff] [blame]	351	bool has_sched,
Omar Sandoval	0cacba6	2017-02-02 15:42:39 -0800	[diff] [blame]	352	struct request *rq)
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	353	{
Ming Lei	a6a252e	2017-11-02 23:24:36 +0800	[diff] [blame]	354	/* dispatch flush rq directly */
				355	if (rq->rq_flags & RQF_FLUSH_SEQ) {
				356	spin_lock(&hctx->lock);
				357	list_add(&rq->queuelist, &hctx->dispatch);
				358	spin_unlock(&hctx->lock);
				359	return true;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	360	}
				361
Ming Lei	923218f	2017-11-02 23:24:38 +0800	[diff] [blame]	362	if (has_sched)
Ming Lei	a6a252e	2017-11-02 23:24:36 +0800	[diff] [blame]	363	rq->rq_flags \|= RQF_SORTED;
Ming Lei	a6a252e	2017-11-02 23:24:36 +0800	[diff] [blame]	364
				365	return false;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	366	}
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	367
Jens Axboe	05b7941	2017-11-08 10:38:29 -0700	[diff] [blame]	368	/**
				369	* list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
				370	* @pos: loop cursor.
				371	* @skip: the list element that will not be examined. Iteration starts at
				372	* @skip->next.
				373	* @head: head of the list to examine. This list must have at least one
				374	* element, namely @skip.
				375	* @member: name of the list_head structure within typeof(*pos).
				376	*/
				377	#define list_for_each_entry_rcu_rr(pos, skip, head, member) \
				378	for ((pos) = (skip); \
				379	(pos = (pos)->member.next != (head) ? list_entry_rcu( \
				380	(pos)->member.next, typeof(*pos), member) : \
				381	list_entry_rcu((pos)->member.next->next, typeof(*pos), member)), \
				382	(pos) != (skip); )
				383
				384	/*
				385	* Called after a driver tag has been freed to check whether a hctx needs to
				386	* be restarted. Restarts @hctx if its tag set is not shared. Restarts hardware
				387	* queues in a round-robin fashion if the tag set of @hctx is shared with other
				388	* hardware queues.
				389	*/
				390	void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
				391	{
				392	struct blk_mq_tags *const tags = hctx->tags;
				393	struct blk_mq_tag_set *const set = hctx->queue->tag_set;
				394	struct request_queue const queue = hctx->queue, q;
				395	struct blk_mq_hw_ctx *hctx2;
				396	unsigned int i, j;
				397
				398	if (set->flags & BLK_MQ_F_TAG_SHARED) {
				399	/*
				400	* If this is 0, then we know that no hardware queues
				401	* have RESTART marked. We're done.
				402	*/
				403	if (!atomic_read(&queue->shared_hctx_restart))
				404	return;
				405
				406	rcu_read_lock();
				407	list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
				408	tag_set_list) {
				409	queue_for_each_hw_ctx(q, hctx2, i)
				410	if (hctx2->tags == tags &&
				411	blk_mq_sched_restart_hctx(hctx2))
				412	goto done;
				413	}
				414	j = hctx->queue_num + 1;
				415	for (i = 0; i < queue->nr_hw_queues; i++, j++) {
				416	if (j == queue->nr_hw_queues)
				417	j = 0;
				418	hctx2 = queue->queue_hw_ctx[j];
				419	if (hctx2->tags == tags &&
				420	blk_mq_sched_restart_hctx(hctx2))
				421	break;
				422	}
				423	done:
				424	rcu_read_unlock();
				425	} else {
				426	blk_mq_sched_restart_hctx(hctx);
				427	}
				428	}
				429
Jens Axboe	bd6737f	2017-01-27 01:00:47 -0700	[diff] [blame]	430	void blk_mq_sched_insert_request(struct request *rq, bool at_head,
				431	bool run_queue, bool async, bool can_block)
				432	{
				433	struct request_queue *q = rq->q;
				434	struct elevator_queue *e = q->elevator;
				435	struct blk_mq_ctx *ctx = rq->mq_ctx;
				436	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
				437
Ming Lei	a6a252e	2017-11-02 23:24:36 +0800	[diff] [blame]	438	/* flush rq in flush machinery need to be dispatched directly */
				439	if (!(rq->rq_flags & RQF_FLUSH_SEQ) && op_is_flush(rq->cmd_flags)) {
Ming Lei	923218f	2017-11-02 23:24:38 +0800	[diff] [blame]	440	blk_insert_flush(rq);
				441	goto run;
Jens Axboe	bd6737f	2017-01-27 01:00:47 -0700	[diff] [blame]	442	}
				443
Ming Lei	923218f	2017-11-02 23:24:38 +0800	[diff] [blame]	444	WARN_ON(e && (rq->tag != -1));
				445
Ming Lei	a6a252e	2017-11-02 23:24:36 +0800	[diff] [blame]	446	if (blk_mq_sched_bypass_insert(hctx, !!e, rq))
Omar Sandoval	0cacba6	2017-02-02 15:42:39 -0800	[diff] [blame]	447	goto run;
				448
Jens Axboe	bd6737f	2017-01-27 01:00:47 -0700	[diff] [blame]	449	if (e && e->type->ops.mq.insert_requests) {
				450	LIST_HEAD(list);
				451
				452	list_add(&rq->queuelist, &list);
				453	e->type->ops.mq.insert_requests(hctx, &list, at_head);
				454	} else {
				455	spin_lock(&ctx->lock);
				456	__blk_mq_insert_request(hctx, rq, at_head);
				457	spin_unlock(&ctx->lock);
				458	}
				459
Omar Sandoval	0cacba6	2017-02-02 15:42:39 -0800	[diff] [blame]	460	run:
Jens Axboe	bd6737f	2017-01-27 01:00:47 -0700	[diff] [blame]	461	if (run_queue)
				462	blk_mq_run_hw_queue(hctx, async);
				463	}
				464
				465	void blk_mq_sched_insert_requests(struct request_queue *q,
				466	struct blk_mq_ctx *ctx,
				467	struct list_head *list, bool run_queue_async)
				468	{
				469	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
				470	struct elevator_queue *e = hctx->queue->elevator;
				471
				472	if (e && e->type->ops.mq.insert_requests)
				473	e->type->ops.mq.insert_requests(hctx, list, false);
				474	else
				475	blk_mq_insert_requests(hctx, ctx, list);
				476
				477	blk_mq_run_hw_queue(hctx, run_queue_async);
				478	}
				479
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	480	static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
				481	struct blk_mq_hw_ctx *hctx,
				482	unsigned int hctx_idx)
				483	{
				484	if (hctx->sched_tags) {
				485	blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
				486	blk_mq_free_rq_map(hctx->sched_tags);
				487	hctx->sched_tags = NULL;
				488	}
				489	}
				490
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	491	static int blk_mq_sched_alloc_tags(struct request_queue *q,
				492	struct blk_mq_hw_ctx *hctx,
				493	unsigned int hctx_idx)
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	494	{
				495	struct blk_mq_tag_set *set = q->tag_set;
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	496	int ret;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	497
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	498	hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests,
				499	set->reserved_tags);
				500	if (!hctx->sched_tags)
				501	return -ENOMEM;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	502
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	503	ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests);
				504	if (ret)
				505	blk_mq_sched_free_tags(set, hctx, hctx_idx);
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	506
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	507	return ret;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	508	}
				509
Omar Sandoval	54d5329	2017-04-07 08:52:27 -0600	[diff] [blame]	510	static void blk_mq_sched_tags_teardown(struct request_queue *q)
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	511	{
				512	struct blk_mq_tag_set *set = q->tag_set;
				513	struct blk_mq_hw_ctx *hctx;
				514	int i;
				515
				516	queue_for_each_hw_ctx(q, hctx, i)
				517	blk_mq_sched_free_tags(set, hctx, i);
				518	}
Jens Axboe	d348499	2017-01-13 14:43:58 -0700	[diff] [blame]	519
Omar Sandoval	9325263	2017-04-05 12:01:31 -0700	[diff] [blame]	520	int blk_mq_sched_init_hctx(struct request_queue q, struct blk_mq_hw_ctx hctx,
				521	unsigned int hctx_idx)
				522	{
				523	struct elevator_queue *e = q->elevator;
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	524	int ret;
Omar Sandoval	9325263	2017-04-05 12:01:31 -0700	[diff] [blame]	525
				526	if (!e)
				527	return 0;
				528
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	529	ret = blk_mq_sched_alloc_tags(q, hctx, hctx_idx);
				530	if (ret)
				531	return ret;
				532
				533	if (e->type->ops.mq.init_hctx) {
				534	ret = e->type->ops.mq.init_hctx(hctx, hctx_idx);
				535	if (ret) {
				536	blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
				537	return ret;
				538	}
				539	}
				540
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	541	blk_mq_debugfs_register_sched_hctx(q, hctx);
				542
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	543	return 0;
Omar Sandoval	9325263	2017-04-05 12:01:31 -0700	[diff] [blame]	544	}
				545
				546	void blk_mq_sched_exit_hctx(struct request_queue q, struct blk_mq_hw_ctx hctx,
				547	unsigned int hctx_idx)
				548	{
				549	struct elevator_queue *e = q->elevator;
				550
				551	if (!e)
				552	return;
				553
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	554	blk_mq_debugfs_unregister_sched_hctx(hctx);
				555
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	556	if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
				557	e->type->ops.mq.exit_hctx(hctx, hctx_idx);
				558	hctx->sched_data = NULL;
				559	}
				560
Omar Sandoval	9325263	2017-04-05 12:01:31 -0700	[diff] [blame]	561	blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
				562	}
				563
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	564	int blk_mq_init_sched(struct request_queue q, struct elevator_type e)
				565	{
				566	struct blk_mq_hw_ctx *hctx;
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	567	struct elevator_queue *eq;
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	568	unsigned int i;
				569	int ret;
				570
				571	if (!e) {
				572	q->elevator = NULL;
				573	return 0;
				574	}
				575
				576	/*
Ming Lei	32825c4	2017-07-03 20:37:14 +0800	[diff] [blame]	577	* Default to double of smaller one between hw queue_depth and 128,
				578	* since we don't split into sync/async like the old code did.
				579	* Additionally, this is a per-hw queue depth.
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	580	*/
Ming Lei	32825c4	2017-07-03 20:37:14 +0800	[diff] [blame]	581	q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
				582	BLKDEV_MAX_RQ);
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	583
				584	queue_for_each_hw_ctx(q, hctx, i) {
				585	ret = blk_mq_sched_alloc_tags(q, hctx, i);
				586	if (ret)
				587	goto err;
				588	}
				589
				590	ret = e->ops.mq.init_sched(q, e);
				591	if (ret)
				592	goto err;
				593
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	594	blk_mq_debugfs_register_sched(q);
				595
				596	queue_for_each_hw_ctx(q, hctx, i) {
				597	if (e->ops.mq.init_hctx) {
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	598	ret = e->ops.mq.init_hctx(hctx, i);
				599	if (ret) {
				600	eq = q->elevator;
				601	blk_mq_exit_sched(q, eq);
				602	kobject_put(&eq->kobj);
				603	return ret;
				604	}
				605	}
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	606	blk_mq_debugfs_register_sched_hctx(q, hctx);
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	607	}
				608
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	609	return 0;
				610
				611	err:
Omar Sandoval	54d5329	2017-04-07 08:52:27 -0600	[diff] [blame]	612	blk_mq_sched_tags_teardown(q);
				613	q->elevator = NULL;
Omar Sandoval	6917ff0	2017-04-05 12:01:30 -0700	[diff] [blame]	614	return ret;
				615	}
				616
Omar Sandoval	54d5329	2017-04-07 08:52:27 -0600	[diff] [blame]	617	void blk_mq_exit_sched(struct request_queue q, struct elevator_queue e)
				618	{
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	619	struct blk_mq_hw_ctx *hctx;
				620	unsigned int i;
				621
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	622	queue_for_each_hw_ctx(q, hctx, i) {
				623	blk_mq_debugfs_unregister_sched_hctx(hctx);
				624	if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
				625	e->type->ops.mq.exit_hctx(hctx, i);
				626	hctx->sched_data = NULL;
Omar Sandoval	ee056f9	2017-04-05 12:01:34 -0700	[diff] [blame]	627	}
				628	}
Omar Sandoval	d332ce0	2017-05-04 08:24:40 -0600	[diff] [blame]	629	blk_mq_debugfs_unregister_sched(q);
Omar Sandoval	54d5329	2017-04-07 08:52:27 -0600	[diff] [blame]	630	if (e->type->ops.mq.exit_sched)
				631	e->type->ops.mq.exit_sched(e);
				632	blk_mq_sched_tags_teardown(q);
				633	q->elevator = NULL;
				634	}
				635
Jens Axboe	d348499	2017-01-13 14:43:58 -0700	[diff] [blame]	636	int blk_mq_sched_init(struct request_queue *q)
				637	{
				638	int ret;
				639
Jens Axboe	d348499	2017-01-13 14:43:58 -0700	[diff] [blame]	640	mutex_lock(&q->sysfs_lock);
				641	ret = elevator_init(q, NULL);
				642	mutex_unlock(&q->sysfs_lock);
				643
				644	return ret;
				645	}