Blame - block/blk-mq-sched.c - kernel/msm-4.19

blob: 9e8d6795a8c1be7eee1c727bb376f209da432ecf [file] [log] [blame]

Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	1	/*
				2	* blk-mq scheduling framework
				3	*
				4	* Copyright (C) 2016 Jens Axboe
				5	*/
				6	#include <linux/kernel.h>
				7	#include <linux/module.h>
				8	#include <linux/blk-mq.h>
				9
				10	#include <trace/events/block.h>
				11
				12	#include "blk.h"
				13	#include "blk-mq.h"
				14	#include "blk-mq-sched.h"
				15	#include "blk-mq-tag.h"
				16	#include "blk-wbt.h"
				17
				18	void blk_mq_sched_free_hctx_data(struct request_queue *q,
				19	void (exit)(struct blk_mq_hw_ctx ))
				20	{
				21	struct blk_mq_hw_ctx *hctx;
				22	int i;
				23
				24	queue_for_each_hw_ctx(q, hctx, i) {
				25	if (exit && hctx->sched_data)
				26	exit(hctx);
				27	kfree(hctx->sched_data);
				28	hctx->sched_data = NULL;
				29	}
				30	}
				31	EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
				32
				33	int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size,
				34	int (init)(struct blk_mq_hw_ctx ),
				35	void (exit)(struct blk_mq_hw_ctx ))
				36	{
				37	struct blk_mq_hw_ctx *hctx;
				38	int ret;
				39	int i;
				40
				41	queue_for_each_hw_ctx(q, hctx, i) {
				42	hctx->sched_data = kmalloc_node(size, GFP_KERNEL, hctx->numa_node);
				43	if (!hctx->sched_data) {
				44	ret = -ENOMEM;
				45	goto error;
				46	}
				47
				48	if (init) {
				49	ret = init(hctx);
				50	if (ret) {
				51	/*
				52	* We don't want to give exit() a partially
				53	* initialized sched_data. init() must clean up
				54	* if it fails.
				55	*/
				56	kfree(hctx->sched_data);
				57	hctx->sched_data = NULL;
				58	goto error;
				59	}
				60	}
				61	}
				62
				63	return 0;
				64	error:
				65	blk_mq_sched_free_hctx_data(q, exit);
				66	return ret;
				67	}
				68	EXPORT_SYMBOL_GPL(blk_mq_sched_init_hctx_data);
				69
				70	static void __blk_mq_sched_assign_ioc(struct request_queue *q,
Paolo Valente	f1ba826	2017-02-07 18:24:43 +0100	[diff] [blame]	71	struct request *rq,
				72	struct bio *bio,
				73	struct io_context *ioc)
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	74	{
				75	struct io_cq *icq;
				76
				77	spin_lock_irq(q->queue_lock);
				78	icq = ioc_lookup_icq(ioc, q);
				79	spin_unlock_irq(q->queue_lock);
				80
				81	if (!icq) {
				82	icq = ioc_create_icq(ioc, q, GFP_ATOMIC);
				83	if (!icq)
				84	return;
				85	}
				86
				87	rq->elv.icq = icq;
Paolo Valente	f1ba826	2017-02-07 18:24:43 +0100	[diff] [blame]	88	if (!blk_mq_sched_get_rq_priv(q, rq, bio)) {
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	89	rq->rq_flags \|= RQF_ELVPRIV;
				90	get_io_context(icq->ioc);
				91	return;
				92	}
				93
				94	rq->elv.icq = NULL;
				95	}
				96
				97	static void blk_mq_sched_assign_ioc(struct request_queue *q,
				98	struct request rq, struct bio bio)
				99	{
				100	struct io_context *ioc;
				101
				102	ioc = rq_ioc(bio);
				103	if (ioc)
Paolo Valente	f1ba826	2017-02-07 18:24:43 +0100	[diff] [blame]	104	__blk_mq_sched_assign_ioc(q, rq, bio, ioc);
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	105	}
				106
				107	struct request blk_mq_sched_get_request(struct request_queue q,
				108	struct bio *bio,
				109	unsigned int op,
				110	struct blk_mq_alloc_data *data)
				111	{
				112	struct elevator_queue *e = q->elevator;
				113	struct blk_mq_hw_ctx *hctx;
				114	struct blk_mq_ctx *ctx;
				115	struct request *rq;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	116
				117	blk_queue_enter_live(q);
				118	ctx = blk_mq_get_ctx(q);
				119	hctx = blk_mq_map_queue(q, ctx->cpu);
				120
Jens Axboe	5a797e0	2017-01-26 12:22:11 -0700	[diff] [blame]	121	blk_mq_set_alloc_data(data, q, data->flags, ctx, hctx);
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	122
				123	if (e) {
				124	data->flags \|= BLK_MQ_REQ_INTERNAL;
				125
				126	/*
				127	* Flush requests are special and go directly to the
				128	* dispatch list.
				129	*/
Christoph Hellwig	f73f44e	2017-01-27 08:30:47 -0700	[diff] [blame]	130	if (!op_is_flush(op) && e->type->ops.mq.get_request) {
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	131	rq = e->type->ops.mq.get_request(q, op, data);
				132	if (rq)
				133	rq->rq_flags \|= RQF_QUEUED;
				134	} else
				135	rq = __blk_mq_alloc_request(data, op);
				136	} else {
				137	rq = __blk_mq_alloc_request(data, op);
Jens Axboe	b48fda0	2017-01-26 14:52:20 -0700	[diff] [blame]	138	if (rq)
				139	data->hctx->tags->rqs[rq->tag] = rq;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	140	}
				141
				142	if (rq) {
Christoph Hellwig	f73f44e	2017-01-27 08:30:47 -0700	[diff] [blame]	143	if (!op_is_flush(op)) {
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	144	rq->elv.icq = NULL;
				145	if (e && e->type->icq_cache)
				146	blk_mq_sched_assign_ioc(q, rq, bio);
				147	}
				148	data->hctx->queued++;
				149	return rq;
				150	}
				151
				152	blk_queue_exit(q);
				153	return NULL;
				154	}
				155
				156	void blk_mq_sched_put_request(struct request *rq)
				157	{
				158	struct request_queue *q = rq->q;
				159	struct elevator_queue *e = q->elevator;
				160
				161	if (rq->rq_flags & RQF_ELVPRIV) {
				162	blk_mq_sched_put_rq_priv(rq->q, rq);
				163	if (rq->elv.icq) {
				164	put_io_context(rq->elv.icq->ioc);
				165	rq->elv.icq = NULL;
				166	}
				167	}
				168
				169	if ((rq->rq_flags & RQF_QUEUED) && e && e->type->ops.mq.put_request)
				170	e->type->ops.mq.put_request(rq);
				171	else
				172	blk_mq_finish_request(rq);
				173	}
				174
				175	void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
				176	{
				177	struct elevator_queue *e = hctx->queue->elevator;
Jens Axboe	64765a7	2017-02-17 11:39:26 -0700	[diff] [blame]	178	const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request;
				179	bool did_work = false;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	180	LIST_HEAD(rq_list);
				181
				182	if (unlikely(blk_mq_hctx_stopped(hctx)))
				183	return;
				184
				185	hctx->run++;
				186
				187	/*
				188	* If we have previous entries on our dispatch list, grab them first for
				189	* more fair dispatch.
				190	*/
				191	if (!list_empty_careful(&hctx->dispatch)) {
				192	spin_lock(&hctx->lock);
				193	if (!list_empty(&hctx->dispatch))
				194	list_splice_init(&hctx->dispatch, &rq_list);
				195	spin_unlock(&hctx->lock);
				196	}
				197
				198	/*
				199	* Only ask the scheduler for requests, if we didn't have residual
				200	* requests from the dispatch list. This is to avoid the case where
				201	* we only ever dispatch a fraction of the requests available because
				202	* of low device queue depth. Once we pull requests out of the IO
				203	* scheduler, we can no longer merge or sort them. So it's best to
				204	* leave them there for as long as we can. Mark the hw queue as
				205	* needing a restart in that case.
				206	*/
Jens Axboe	c13660a	2017-01-26 12:40:07 -0700	[diff] [blame]	207	if (!list_empty(&rq_list)) {
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	208	blk_mq_sched_mark_restart(hctx);
Jens Axboe	64765a7	2017-02-17 11:39:26 -0700	[diff] [blame]	209	did_work = blk_mq_dispatch_rq_list(hctx, &rq_list);
				210	} else if (!has_sched_dispatch) {
Jens Axboe	c13660a	2017-01-26 12:40:07 -0700	[diff] [blame]	211	blk_mq_flush_busy_ctxs(hctx, &rq_list);
				212	blk_mq_dispatch_rq_list(hctx, &rq_list);
Jens Axboe	64765a7	2017-02-17 11:39:26 -0700	[diff] [blame]	213	}
				214
				215	/*
				216	* We want to dispatch from the scheduler if we had no work left
				217	* on the dispatch list, OR if we did have work but weren't able
				218	* to make progress.
				219	*/
				220	if (!did_work && has_sched_dispatch) {
Jens Axboe	c13660a	2017-01-26 12:40:07 -0700	[diff] [blame]	221	do {
				222	struct request *rq;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	223
Jens Axboe	c13660a	2017-01-26 12:40:07 -0700	[diff] [blame]	224	rq = e->type->ops.mq.dispatch_request(hctx);
				225	if (!rq)
				226	break;
				227	list_add(&rq->queuelist, &rq_list);
				228	} while (blk_mq_dispatch_rq_list(hctx, &rq_list));
				229	}
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	230	}
				231
				232	void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
				233	struct list_head *rq_list,
				234	struct request (get_rq)(struct blk_mq_hw_ctx *))
				235	{
				236	do {
				237	struct request *rq;
				238
				239	rq = get_rq(hctx);
				240	if (!rq)
				241	break;
				242
				243	list_add_tail(&rq->queuelist, rq_list);
				244	} while (1);
				245	}
				246	EXPORT_SYMBOL_GPL(blk_mq_sched_move_to_dispatch);
				247
Jens Axboe	e4d750c	2017-02-03 09:48:28 -0700	[diff] [blame]	248	bool blk_mq_sched_try_merge(struct request_queue q, struct bio bio,
				249	struct request **merged_request)
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	250	{
				251	struct request *rq;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	252
Christoph Hellwig	34fe7c0	2017-02-08 14:46:48 +0100	[diff] [blame]	253	switch (elv_merge(q, &rq, bio)) {
				254	case ELEVATOR_BACK_MERGE:
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	255	if (!blk_mq_sched_allow_merge(q, rq, bio))
				256	return false;
Christoph Hellwig	34fe7c0	2017-02-08 14:46:48 +0100	[diff] [blame]	257	if (!bio_attempt_back_merge(q, rq, bio))
				258	return false;
				259	*merged_request = attempt_back_merge(q, rq);
				260	if (!*merged_request)
				261	elv_merged_request(q, rq, ELEVATOR_BACK_MERGE);
				262	return true;
				263	case ELEVATOR_FRONT_MERGE:
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	264	if (!blk_mq_sched_allow_merge(q, rq, bio))
				265	return false;
Christoph Hellwig	34fe7c0	2017-02-08 14:46:48 +0100	[diff] [blame]	266	if (!bio_attempt_front_merge(q, rq, bio))
				267	return false;
				268	*merged_request = attempt_front_merge(q, rq);
				269	if (!*merged_request)
				270	elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
				271	return true;
				272	default:
				273	return false;
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	274	}
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	275	}
				276	EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
				277
				278	bool __blk_mq_sched_bio_merge(struct request_queue q, struct bio bio)
				279	{
				280	struct elevator_queue *e = q->elevator;
				281
				282	if (e->type->ops.mq.bio_merge) {
				283	struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
				284	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
				285
				286	blk_mq_put_ctx(ctx);
				287	return e->type->ops.mq.bio_merge(hctx, bio);
				288	}
				289
				290	return false;
				291	}
				292
				293	bool blk_mq_sched_try_insert_merge(struct request_queue q, struct request rq)
				294	{
				295	return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq);
				296	}
				297	EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
				298
				299	void blk_mq_sched_request_inserted(struct request *rq)
				300	{
				301	trace_block_rq_insert(rq->q, rq);
				302	}
				303	EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
				304
Omar Sandoval	0cacba6	2017-02-02 15:42:39 -0800	[diff] [blame]	305	static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
				306	struct request *rq)
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	307	{
				308	if (rq->tag == -1) {
				309	rq->rq_flags \|= RQF_SORTED;
				310	return false;
				311	}
				312
				313	/*
				314	* If we already have a real request tag, send directly to
				315	* the dispatch list.
				316	*/
				317	spin_lock(&hctx->lock);
				318	list_add(&rq->queuelist, &hctx->dispatch);
				319	spin_unlock(&hctx->lock);
				320	return true;
				321	}
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	322
Jens Axboe	50e1dab	2017-01-26 14:42:34 -0700	[diff] [blame]	323	static void blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
				324	{
				325	if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
				326	clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
				327	if (blk_mq_hctx_has_pending(hctx))
				328	blk_mq_run_hw_queue(hctx, true);
				329	}
				330	}
				331
				332	void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx)
				333	{
				334	unsigned int i;
				335
				336	if (!(hctx->flags & BLK_MQ_F_TAG_SHARED))
				337	blk_mq_sched_restart_hctx(hctx);
				338	else {
				339	struct request_queue *q = hctx->queue;
				340
				341	if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags))
				342	return;
				343
				344	clear_bit(QUEUE_FLAG_RESTART, &q->queue_flags);
				345
				346	queue_for_each_hw_ctx(q, hctx, i)
				347	blk_mq_sched_restart_hctx(hctx);
				348	}
				349	}
				350
Jens Axboe	bd6737f	2017-01-27 01:00:47 -0700	[diff] [blame]	351	/*
				352	* Add flush/fua to the queue. If we fail getting a driver tag, then
				353	* punt to the requeue list. Requeue will re-invoke us from a context
				354	* that's safe to block from.
				355	*/
				356	static void blk_mq_sched_insert_flush(struct blk_mq_hw_ctx *hctx,
				357	struct request *rq, bool can_block)
				358	{
				359	if (blk_mq_get_driver_tag(rq, &hctx, can_block)) {
				360	blk_insert_flush(rq);
				361	blk_mq_run_hw_queue(hctx, true);
				362	} else
Jens Axboe	c7a571b	2017-02-17 11:37:14 -0700	[diff] [blame]	363	blk_mq_add_to_requeue_list(rq, false, true);
Jens Axboe	bd6737f	2017-01-27 01:00:47 -0700	[diff] [blame]	364	}
				365
				366	void blk_mq_sched_insert_request(struct request *rq, bool at_head,
				367	bool run_queue, bool async, bool can_block)
				368	{
				369	struct request_queue *q = rq->q;
				370	struct elevator_queue *e = q->elevator;
				371	struct blk_mq_ctx *ctx = rq->mq_ctx;
				372	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
				373
Jens Axboe	f3a8ab7	2017-01-27 09:08:23 -0700	[diff] [blame]	374	if (rq->tag == -1 && op_is_flush(rq->cmd_flags)) {
Jens Axboe	bd6737f	2017-01-27 01:00:47 -0700	[diff] [blame]	375	blk_mq_sched_insert_flush(hctx, rq, can_block);
				376	return;
				377	}
				378
Omar Sandoval	0cacba6	2017-02-02 15:42:39 -0800	[diff] [blame]	379	if (e && blk_mq_sched_bypass_insert(hctx, rq))
				380	goto run;
				381
Jens Axboe	bd6737f	2017-01-27 01:00:47 -0700	[diff] [blame]	382	if (e && e->type->ops.mq.insert_requests) {
				383	LIST_HEAD(list);
				384
				385	list_add(&rq->queuelist, &list);
				386	e->type->ops.mq.insert_requests(hctx, &list, at_head);
				387	} else {
				388	spin_lock(&ctx->lock);
				389	__blk_mq_insert_request(hctx, rq, at_head);
				390	spin_unlock(&ctx->lock);
				391	}
				392
Omar Sandoval	0cacba6	2017-02-02 15:42:39 -0800	[diff] [blame]	393	run:
Jens Axboe	bd6737f	2017-01-27 01:00:47 -0700	[diff] [blame]	394	if (run_queue)
				395	blk_mq_run_hw_queue(hctx, async);
				396	}
				397
				398	void blk_mq_sched_insert_requests(struct request_queue *q,
				399	struct blk_mq_ctx *ctx,
				400	struct list_head *list, bool run_queue_async)
				401	{
				402	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
				403	struct elevator_queue *e = hctx->queue->elevator;
				404
Omar Sandoval	0cacba6	2017-02-02 15:42:39 -0800	[diff] [blame]	405	if (e) {
				406	struct request rq, next;
				407
				408	/*
				409	* We bypass requests that already have a driver tag assigned,
				410	* which should only be flushes. Flushes are only ever inserted
				411	* as single requests, so we shouldn't ever hit the
				412	* WARN_ON_ONCE() below (but let's handle it just in case).
				413	*/
				414	list_for_each_entry_safe(rq, next, list, queuelist) {
				415	if (WARN_ON_ONCE(rq->tag != -1)) {
				416	list_del_init(&rq->queuelist);
				417	blk_mq_sched_bypass_insert(hctx, rq);
				418	}
				419	}
				420	}
				421
Jens Axboe	bd6737f	2017-01-27 01:00:47 -0700	[diff] [blame]	422	if (e && e->type->ops.mq.insert_requests)
				423	e->type->ops.mq.insert_requests(hctx, list, false);
				424	else
				425	blk_mq_insert_requests(hctx, ctx, list);
				426
				427	blk_mq_run_hw_queue(hctx, run_queue_async);
				428	}
				429
Jens Axboe	bd166ef	2017-01-17 06:03:22 -0700	[diff] [blame]	430	static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
				431	struct blk_mq_hw_ctx *hctx,
				432	unsigned int hctx_idx)
				433	{
				434	if (hctx->sched_tags) {
				435	blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
				436	blk_mq_free_rq_map(hctx->sched_tags);
				437	hctx->sched_tags = NULL;
				438	}
				439	}
				440
				441	int blk_mq_sched_setup(struct request_queue *q)
				442	{
				443	struct blk_mq_tag_set *set = q->tag_set;
				444	struct blk_mq_hw_ctx *hctx;
				445	int ret, i;
				446
				447	/*
				448	* Default to 256, since we don't split into sync/async like the
				449	* old code did. Additionally, this is a per-hw queue depth.
				450	*/
				451	q->nr_requests = 2 * BLKDEV_MAX_RQ;
				452
				453	/*
				454	* We're switching to using an IO scheduler, so setup the hctx
				455	* scheduler tags and switch the request map from the regular
				456	* tags to scheduler tags. First allocate what we need, so we
				457	* can safely fail and fallback, if needed.
				458	*/
				459	ret = 0;
				460	queue_for_each_hw_ctx(q, hctx, i) {
				461	hctx->sched_tags = blk_mq_alloc_rq_map(set, i, q->nr_requests, 0);
				462	if (!hctx->sched_tags) {
				463	ret = -ENOMEM;
				464	break;
				465	}
				466	ret = blk_mq_alloc_rqs(set, hctx->sched_tags, i, q->nr_requests);
				467	if (ret)
				468	break;
				469	}
				470
				471	/*
				472	* If we failed, free what we did allocate
				473	*/
				474	if (ret) {
				475	queue_for_each_hw_ctx(q, hctx, i) {
				476	if (!hctx->sched_tags)
				477	continue;
				478	blk_mq_sched_free_tags(set, hctx, i);
				479	}
				480
				481	return ret;
				482	}
				483
				484	return 0;
				485	}
				486
				487	void blk_mq_sched_teardown(struct request_queue *q)
				488	{
				489	struct blk_mq_tag_set *set = q->tag_set;
				490	struct blk_mq_hw_ctx *hctx;
				491	int i;
				492
				493	queue_for_each_hw_ctx(q, hctx, i)
				494	blk_mq_sched_free_tags(set, hctx, i);
				495	}
Jens Axboe	d348499	2017-01-13 14:43:58 -0700	[diff] [blame]	496
				497	int blk_mq_sched_init(struct request_queue *q)
				498	{
				499	int ret;
				500
				501	#if defined(CONFIG_DEFAULT_SQ_NONE)
				502	if (q->nr_hw_queues == 1)
				503	return 0;
				504	#endif
				505	#if defined(CONFIG_DEFAULT_MQ_NONE)
				506	if (q->nr_hw_queues > 1)
				507	return 0;
				508	#endif
				509
				510	mutex_lock(&q->sysfs_lock);
				511	ret = elevator_init(q, NULL);
				512	mutex_unlock(&q->sysfs_lock);
				513
				514	return ret;
				515	}