Blame - block/kyber-iosched.c - kernel/msm-4.19

blob: b9faabc75fdb031b4f9d7917b1f91aa8e4ba2c06 [file] [log] [blame]

Omar Sandoval	00e0439	2017-04-14 01:00:02 -0700	[diff] [blame]	1	/*
				2	* The Kyber I/O scheduler. Controls latency by throttling queue depths using
				3	* scalable techniques.
				4	*
				5	* Copyright (C) 2017 Facebook
				6	*
				7	* This program is free software; you can redistribute it and/or
				8	* modify it under the terms of the GNU General Public
				9	* License v2 as published by the Free Software Foundation.
				10	*
				11	* This program is distributed in the hope that it will be useful,
				12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				14	* General Public License for more details.
				15	*
				16	* You should have received a copy of the GNU General Public License
				17	* along with this program. If not, see <https://www.gnu.org/licenses/>.
				18	*/
				19
				20	#include <linux/kernel.h>
				21	#include <linux/blkdev.h>
				22	#include <linux/blk-mq.h>
				23	#include <linux/elevator.h>
				24	#include <linux/module.h>
				25	#include <linux/sbitmap.h>
				26
				27	#include "blk.h"
				28	#include "blk-mq.h"
Omar Sandoval	16b738f	2017-05-04 00:31:33 -0700	[diff] [blame]	29	#include "blk-mq-debugfs.h"
Omar Sandoval	00e0439	2017-04-14 01:00:02 -0700	[diff] [blame]	30	#include "blk-mq-sched.h"
				31	#include "blk-mq-tag.h"
				32	#include "blk-stat.h"
				33
				34	/* Scheduling domains. */
				35	enum {
				36	KYBER_READ,
				37	KYBER_SYNC_WRITE,
				38	KYBER_OTHER, /* Async writes, discard, etc. */
				39	KYBER_NUM_DOMAINS,
				40	};
				41
				42	enum {
				43	KYBER_MIN_DEPTH = 256,
				44
				45	/*
				46	* In order to prevent starvation of synchronous requests by a flood of
				47	* asynchronous requests, we reserve 25% of requests for synchronous
				48	* operations.
				49	*/
				50	KYBER_ASYNC_PERCENT = 75,
				51	};
				52
				53	/*
				54	* Initial device-wide depths for each scheduling domain.
				55	*
				56	* Even for fast devices with lots of tags like NVMe, you can saturate
				57	* the device with only a fraction of the maximum possible queue depth.
				58	* So, we cap these to a reasonable value.
				59	*/
				60	static const unsigned int kyber_depth[] = {
				61	[KYBER_READ] = 256,
				62	[KYBER_SYNC_WRITE] = 128,
				63	[KYBER_OTHER] = 64,
				64	};
				65
				66	/*
				67	* Scheduling domain batch sizes. We favor reads.
				68	*/
				69	static const unsigned int kyber_batch_size[] = {
				70	[KYBER_READ] = 16,
				71	[KYBER_SYNC_WRITE] = 8,
				72	[KYBER_OTHER] = 8,
				73	};
				74
				75	struct kyber_queue_data {
				76	struct request_queue *q;
				77
				78	struct blk_stat_callback *cb;
				79
				80	/*
				81	* The device is divided into multiple scheduling domains based on the
				82	* request type. Each domain has a fixed number of in-flight requests of
				83	* that type device-wide, limited by these tokens.
				84	*/
				85	struct sbitmap_queue domain_tokens[KYBER_NUM_DOMAINS];
				86
				87	/*
				88	* Async request percentage, converted to per-word depth for
				89	* sbitmap_get_shallow().
				90	*/
				91	unsigned int async_depth;
				92
				93	/* Target latencies in nanoseconds. */
				94	u64 read_lat_nsec, write_lat_nsec;
				95	};
				96
				97	struct kyber_hctx_data {
				98	spinlock_t lock;
				99	struct list_head rqs[KYBER_NUM_DOMAINS];
				100	unsigned int cur_domain;
				101	unsigned int batching;
				102	wait_queue_t domain_wait[KYBER_NUM_DOMAINS];
				103	atomic_t wait_index[KYBER_NUM_DOMAINS];
				104	};
				105
Stephen Bates	a37244e	2017-04-20 15:29:16 -0600	[diff] [blame]	106	static int rq_sched_domain(const struct request *rq)
Omar Sandoval	00e0439	2017-04-14 01:00:02 -0700	[diff] [blame]	107	{
				108	unsigned int op = rq->cmd_flags;
				109
				110	if ((op & REQ_OP_MASK) == REQ_OP_READ)
				111	return KYBER_READ;
				112	else if ((op & REQ_OP_MASK) == REQ_OP_WRITE && op_is_sync(op))
				113	return KYBER_SYNC_WRITE;
				114	else
				115	return KYBER_OTHER;
				116	}
				117
				118	enum {
				119	NONE = 0,
				120	GOOD = 1,
				121	GREAT = 2,
				122	BAD = -1,
				123	AWFUL = -2,
				124	};
				125
				126	#define IS_GOOD(status) ((status) > 0)
				127	#define IS_BAD(status) ((status) < 0)
				128
				129	static int kyber_lat_status(struct blk_stat_callback *cb,
				130	unsigned int sched_domain, u64 target)
				131	{
				132	u64 latency;
				133
				134	if (!cb->stat[sched_domain].nr_samples)
				135	return NONE;
				136
				137	latency = cb->stat[sched_domain].mean;
				138	if (latency >= 2 * target)
				139	return AWFUL;
				140	else if (latency > target)
				141	return BAD;
				142	else if (latency <= target / 2)
				143	return GREAT;
				144	else /* (latency <= target) */
				145	return GOOD;
				146	}
				147
				148	/*
				149	* Adjust the read or synchronous write depth given the status of reads and
				150	* writes. The goal is that the latencies of the two domains are fair (i.e., if
				151	* one is good, then the other is good).
				152	*/
				153	static void kyber_adjust_rw_depth(struct kyber_queue_data *kqd,
				154	unsigned int sched_domain, int this_status,
				155	int other_status)
				156	{
				157	unsigned int orig_depth, depth;
				158
				159	/*
				160	* If this domain had no samples, or reads and writes are both good or
				161	* both bad, don't adjust the depth.
				162	*/
				163	if (this_status == NONE \|\|
				164	(IS_GOOD(this_status) && IS_GOOD(other_status)) \|\|
				165	(IS_BAD(this_status) && IS_BAD(other_status)))
				166	return;
				167
				168	orig_depth = depth = kqd->domain_tokens[sched_domain].sb.depth;
				169
				170	if (other_status == NONE) {
				171	depth++;
				172	} else {
				173	switch (this_status) {
				174	case GOOD:
				175	if (other_status == AWFUL)
				176	depth -= max(depth / 4, 1U);
				177	else
				178	depth -= max(depth / 8, 1U);
				179	break;
				180	case GREAT:
				181	if (other_status == AWFUL)
				182	depth /= 2;
				183	else
				184	depth -= max(depth / 4, 1U);
				185	break;
				186	case BAD:
				187	depth++;
				188	break;
				189	case AWFUL:
				190	if (other_status == GREAT)
				191	depth += 2;
				192	else
				193	depth++;
				194	break;
				195	}
				196	}
				197
				198	depth = clamp(depth, 1U, kyber_depth[sched_domain]);
				199	if (depth != orig_depth)
				200	sbitmap_queue_resize(&kqd->domain_tokens[sched_domain], depth);
				201	}
				202
				203	/*
				204	* Adjust the depth of other requests given the status of reads and synchronous
				205	* writes. As long as either domain is doing fine, we don't throttle, but if
				206	* both domains are doing badly, we throttle heavily.
				207	*/
				208	static void kyber_adjust_other_depth(struct kyber_queue_data *kqd,
				209	int read_status, int write_status,
				210	bool have_samples)
				211	{
				212	unsigned int orig_depth, depth;
				213	int status;
				214
				215	orig_depth = depth = kqd->domain_tokens[KYBER_OTHER].sb.depth;
				216
				217	if (read_status == NONE && write_status == NONE) {
				218	depth += 2;
				219	} else if (have_samples) {
				220	if (read_status == NONE)
				221	status = write_status;
				222	else if (write_status == NONE)
				223	status = read_status;
				224	else
				225	status = max(read_status, write_status);
				226	switch (status) {
				227	case GREAT:
				228	depth += 2;
				229	break;
				230	case GOOD:
				231	depth++;
				232	break;
				233	case BAD:
				234	depth -= max(depth / 4, 1U);
				235	break;
				236	case AWFUL:
				237	depth /= 2;
				238	break;
				239	}
				240	}
				241
				242	depth = clamp(depth, 1U, kyber_depth[KYBER_OTHER]);
				243	if (depth != orig_depth)
				244	sbitmap_queue_resize(&kqd->domain_tokens[KYBER_OTHER], depth);
				245	}
				246
				247	/*
				248	* Apply heuristics for limiting queue depths based on gathered latency
				249	* statistics.
				250	*/
				251	static void kyber_stat_timer_fn(struct blk_stat_callback *cb)
				252	{
				253	struct kyber_queue_data *kqd = cb->data;
				254	int read_status, write_status;
				255
				256	read_status = kyber_lat_status(cb, KYBER_READ, kqd->read_lat_nsec);
				257	write_status = kyber_lat_status(cb, KYBER_SYNC_WRITE, kqd->write_lat_nsec);
				258
				259	kyber_adjust_rw_depth(kqd, KYBER_READ, read_status, write_status);
				260	kyber_adjust_rw_depth(kqd, KYBER_SYNC_WRITE, write_status, read_status);
				261	kyber_adjust_other_depth(kqd, read_status, write_status,
				262	cb->stat[KYBER_OTHER].nr_samples != 0);
				263
				264	/*
				265	* Continue monitoring latencies if we aren't hitting the targets or
				266	* we're still throttling other requests.
				267	*/
				268	if (!blk_stat_is_active(kqd->cb) &&
				269	((IS_BAD(read_status) \|\| IS_BAD(write_status) \|\|
				270	kqd->domain_tokens[KYBER_OTHER].sb.depth < kyber_depth[KYBER_OTHER])))
				271	blk_stat_activate_msecs(kqd->cb, 100);
				272	}
				273
				274	static unsigned int kyber_sched_tags_shift(struct kyber_queue_data *kqd)
				275	{
				276	/*
				277	* All of the hardware queues have the same depth, so we can just grab
				278	* the shift of the first one.
				279	*/
				280	return kqd->q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift;
				281	}
				282
				283	static struct kyber_queue_data kyber_queue_data_alloc(struct request_queue q)
				284	{
				285	struct kyber_queue_data *kqd;
				286	unsigned int max_tokens;
				287	unsigned int shift;
				288	int ret = -ENOMEM;
				289	int i;
				290
				291	kqd = kmalloc_node(sizeof(*kqd), GFP_KERNEL, q->node);
				292	if (!kqd)
				293	goto err;
				294	kqd->q = q;
				295
				296	kqd->cb = blk_stat_alloc_callback(kyber_stat_timer_fn, rq_sched_domain,
				297	KYBER_NUM_DOMAINS, kqd);
				298	if (!kqd->cb)
				299	goto err_kqd;
				300
				301	/*
				302	* The maximum number of tokens for any scheduling domain is at least
				303	* the queue depth of a single hardware queue. If the hardware doesn't
				304	* have many tags, still provide a reasonable number.
				305	*/
				306	max_tokens = max_t(unsigned int, q->tag_set->queue_depth,
				307	KYBER_MIN_DEPTH);
				308	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
				309	WARN_ON(!kyber_depth[i]);
				310	WARN_ON(!kyber_batch_size[i]);
				311	ret = sbitmap_queue_init_node(&kqd->domain_tokens[i],
				312	max_tokens, -1, false, GFP_KERNEL,
				313	q->node);
				314	if (ret) {
				315	while (--i >= 0)
				316	sbitmap_queue_free(&kqd->domain_tokens[i]);
				317	goto err_cb;
				318	}
				319	sbitmap_queue_resize(&kqd->domain_tokens[i], kyber_depth[i]);
				320	}
				321
				322	shift = kyber_sched_tags_shift(kqd);
				323	kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
				324
				325	kqd->read_lat_nsec = 2000000ULL;
				326	kqd->write_lat_nsec = 10000000ULL;
				327
				328	return kqd;
				329
				330	err_cb:
				331	blk_stat_free_callback(kqd->cb);
				332	err_kqd:
				333	kfree(kqd);
				334	err:
				335	return ERR_PTR(ret);
				336	}
				337
				338	static int kyber_init_sched(struct request_queue q, struct elevator_type e)
				339	{
				340	struct kyber_queue_data *kqd;
				341	struct elevator_queue *eq;
				342
				343	eq = elevator_alloc(q, e);
				344	if (!eq)
				345	return -ENOMEM;
				346
				347	kqd = kyber_queue_data_alloc(q);
				348	if (IS_ERR(kqd)) {
				349	kobject_put(&eq->kobj);
				350	return PTR_ERR(kqd);
				351	}
				352
				353	eq->elevator_data = kqd;
				354	q->elevator = eq;
				355
				356	blk_stat_add_callback(q, kqd->cb);
				357
				358	return 0;
				359	}
				360
				361	static void kyber_exit_sched(struct elevator_queue *e)
				362	{
				363	struct kyber_queue_data *kqd = e->elevator_data;
				364	struct request_queue *q = kqd->q;
				365	int i;
				366
				367	blk_stat_remove_callback(q, kqd->cb);
				368
				369	for (i = 0; i < KYBER_NUM_DOMAINS; i++)
				370	sbitmap_queue_free(&kqd->domain_tokens[i]);
				371	blk_stat_free_callback(kqd->cb);
				372	kfree(kqd);
				373	}
				374
				375	static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
				376	{
				377	struct kyber_hctx_data *khd;
				378	int i;
				379
				380	khd = kmalloc_node(sizeof(*khd), GFP_KERNEL, hctx->numa_node);
				381	if (!khd)
				382	return -ENOMEM;
				383
				384	spin_lock_init(&khd->lock);
				385
				386	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
				387	INIT_LIST_HEAD(&khd->rqs[i]);
				388	INIT_LIST_HEAD(&khd->domain_wait[i].task_list);
				389	atomic_set(&khd->wait_index[i], 0);
				390	}
				391
				392	khd->cur_domain = 0;
				393	khd->batching = 0;
				394
				395	hctx->sched_data = khd;
				396
				397	return 0;
				398	}
				399
				400	static void kyber_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
				401	{
				402	kfree(hctx->sched_data);
				403	}
				404
				405	static int rq_get_domain_token(struct request *rq)
				406	{
				407	return (long)rq->elv.priv[0];
				408	}
				409
				410	static void rq_set_domain_token(struct request *rq, int token)
				411	{
				412	rq->elv.priv[0] = (void *)(long)token;
				413	}
				414
				415	static void rq_clear_domain_token(struct kyber_queue_data *kqd,
				416	struct request *rq)
				417	{
				418	unsigned int sched_domain;
				419	int nr;
				420
				421	nr = rq_get_domain_token(rq);
				422	if (nr != -1) {
				423	sched_domain = rq_sched_domain(rq);
				424	sbitmap_queue_clear(&kqd->domain_tokens[sched_domain], nr,
				425	rq->mq_ctx->cpu);
				426	}
				427	}
				428
				429	static struct request kyber_get_request(struct request_queue q,
				430	unsigned int op,
				431	struct blk_mq_alloc_data *data)
				432	{
				433	struct kyber_queue_data *kqd = q->elevator->elevator_data;
				434	struct request *rq;
				435
				436	/*
				437	* We use the scheduler tags as per-hardware queue queueing tokens.
				438	* Async requests can be limited at this stage.
				439	*/
				440	if (!op_is_sync(op))
				441	data->shallow_depth = kqd->async_depth;
				442
				443	rq = __blk_mq_alloc_request(data, op);
				444	if (rq)
				445	rq_set_domain_token(rq, -1);
				446	return rq;
				447	}
				448
				449	static void kyber_put_request(struct request *rq)
				450	{
				451	struct request_queue *q = rq->q;
				452	struct kyber_queue_data *kqd = q->elevator->elevator_data;
				453
				454	rq_clear_domain_token(kqd, rq);
				455	blk_mq_finish_request(rq);
				456	}
				457
				458	static void kyber_completed_request(struct request *rq)
				459	{
				460	struct request_queue *q = rq->q;
				461	struct kyber_queue_data *kqd = q->elevator->elevator_data;
				462	unsigned int sched_domain;
				463	u64 now, latency, target;
				464
				465	/*
				466	* Check if this request met our latency goal. If not, quickly gather
				467	* some statistics and start throttling.
				468	*/
				469	sched_domain = rq_sched_domain(rq);
				470	switch (sched_domain) {
				471	case KYBER_READ:
				472	target = kqd->read_lat_nsec;
				473	break;
				474	case KYBER_SYNC_WRITE:
				475	target = kqd->write_lat_nsec;
				476	break;
				477	default:
				478	return;
				479	}
				480
				481	/* If we are already monitoring latencies, don't check again. */
				482	if (blk_stat_is_active(kqd->cb))
				483	return;
				484
				485	now = __blk_stat_time(ktime_to_ns(ktime_get()));
				486	if (now < blk_stat_time(&rq->issue_stat))
				487	return;
				488
				489	latency = now - blk_stat_time(&rq->issue_stat);
				490
				491	if (latency > target)
				492	blk_stat_activate_msecs(kqd->cb, 10);
				493	}
				494
				495	static void kyber_flush_busy_ctxs(struct kyber_hctx_data *khd,
				496	struct blk_mq_hw_ctx *hctx)
				497	{
				498	LIST_HEAD(rq_list);
				499	struct request rq, next;
				500
				501	blk_mq_flush_busy_ctxs(hctx, &rq_list);
				502	list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
				503	unsigned int sched_domain;
				504
				505	sched_domain = rq_sched_domain(rq);
				506	list_move_tail(&rq->queuelist, &khd->rqs[sched_domain]);
				507	}
				508	}
				509
				510	static int kyber_domain_wake(wait_queue_t *wait, unsigned mode, int flags,
				511	void *key)
				512	{
				513	struct blk_mq_hw_ctx *hctx = READ_ONCE(wait->private);
				514
				515	list_del_init(&wait->task_list);
				516	blk_mq_run_hw_queue(hctx, true);
				517	return 1;
				518	}
				519
				520	static int kyber_get_domain_token(struct kyber_queue_data *kqd,
				521	struct kyber_hctx_data *khd,
				522	struct blk_mq_hw_ctx *hctx)
				523	{
				524	unsigned int sched_domain = khd->cur_domain;
				525	struct sbitmap_queue *domain_tokens = &kqd->domain_tokens[sched_domain];
				526	wait_queue_t *wait = &khd->domain_wait[sched_domain];
				527	struct sbq_wait_state *ws;
				528	int nr;
				529
				530	nr = __sbitmap_queue_get(domain_tokens);
				531	if (nr >= 0)
				532	return nr;
				533
				534	/*
				535	* If we failed to get a domain token, make sure the hardware queue is
				536	* run when one becomes available. Note that this is serialized on
				537	* khd->lock, but we still need to be careful about the waker.
				538	*/
				539	if (list_empty_careful(&wait->task_list)) {
				540	init_waitqueue_func_entry(wait, kyber_domain_wake);
				541	wait->private = hctx;
				542	ws = sbq_wait_ptr(domain_tokens,
				543	&khd->wait_index[sched_domain]);
				544	add_wait_queue(&ws->wait, wait);
				545
				546	/*
				547	* Try again in case a token was freed before we got on the wait
				548	* queue.
				549	*/
				550	nr = __sbitmap_queue_get(domain_tokens);
				551	}
				552	return nr;
				553	}
				554
				555	static struct request *
				556	kyber_dispatch_cur_domain(struct kyber_queue_data *kqd,
				557	struct kyber_hctx_data *khd,
				558	struct blk_mq_hw_ctx *hctx,
				559	bool *flushed)
				560	{
				561	struct list_head *rqs;
				562	struct request *rq;
				563	int nr;
				564
				565	rqs = &khd->rqs[khd->cur_domain];
				566	rq = list_first_entry_or_null(rqs, struct request, queuelist);
				567
				568	/*
				569	* If there wasn't already a pending request and we haven't flushed the
				570	* software queues yet, flush the software queues and check again.
				571	*/
				572	if (!rq && !*flushed) {
				573	kyber_flush_busy_ctxs(khd, hctx);
				574	*flushed = true;
				575	rq = list_first_entry_or_null(rqs, struct request, queuelist);
				576	}
				577
				578	if (rq) {
				579	nr = kyber_get_domain_token(kqd, khd, hctx);
				580	if (nr >= 0) {
				581	khd->batching++;
				582	rq_set_domain_token(rq, nr);
				583	list_del_init(&rq->queuelist);
				584	return rq;
				585	}
				586	}
				587
				588	/* There were either no pending requests or no tokens. */
				589	return NULL;
				590	}
				591
				592	static struct request kyber_dispatch_request(struct blk_mq_hw_ctx hctx)
				593	{
				594	struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
				595	struct kyber_hctx_data *khd = hctx->sched_data;
				596	bool flushed = false;
				597	struct request *rq;
				598	int i;
				599
				600	spin_lock(&khd->lock);
				601
				602	/*
				603	* First, if we are still entitled to batch, try to dispatch a request
				604	* from the batch.
				605	*/
				606	if (khd->batching < kyber_batch_size[khd->cur_domain]) {
				607	rq = kyber_dispatch_cur_domain(kqd, khd, hctx, &flushed);
				608	if (rq)
				609	goto out;
				610	}
				611
				612	/*
				613	* Either,
				614	* 1. We were no longer entitled to a batch.
				615	* 2. The domain we were batching didn't have any requests.
				616	* 3. The domain we were batching was out of tokens.
				617	*
				618	* Start another batch. Note that this wraps back around to the original
				619	* domain if no other domains have requests or tokens.
				620	*/
				621	khd->batching = 0;
				622	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
				623	if (khd->cur_domain == KYBER_NUM_DOMAINS - 1)
				624	khd->cur_domain = 0;
				625	else
				626	khd->cur_domain++;
				627
				628	rq = kyber_dispatch_cur_domain(kqd, khd, hctx, &flushed);
				629	if (rq)
				630	goto out;
				631	}
				632
				633	rq = NULL;
				634	out:
				635	spin_unlock(&khd->lock);
				636	return rq;
				637	}
				638
				639	static bool kyber_has_work(struct blk_mq_hw_ctx *hctx)
				640	{
				641	struct kyber_hctx_data *khd = hctx->sched_data;
				642	int i;
				643
				644	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
				645	if (!list_empty_careful(&khd->rqs[i]))
				646	return true;
				647	}
				648	return false;
				649	}
				650
				651	#define KYBER_LAT_SHOW_STORE(op) \
				652	static ssize_t kyber_##op##_lat_show(struct elevator_queue *e, \
				653	char *page) \
				654	{ \
				655	struct kyber_queue_data *kqd = e->elevator_data; \
				656	\
				657	return sprintf(page, "%llu\n", kqd->op##_lat_nsec); \
				658	} \
				659	\
				660	static ssize_t kyber_##op##_lat_store(struct elevator_queue *e, \
				661	const char *page, size_t count) \
				662	{ \
				663	struct kyber_queue_data *kqd = e->elevator_data; \
				664	unsigned long long nsec; \
				665	int ret; \
				666	\
				667	ret = kstrtoull(page, 10, &nsec); \
				668	if (ret) \
				669	return ret; \
				670	\
				671	kqd->op##_lat_nsec = nsec; \
				672	\
				673	return count; \
				674	}
				675	KYBER_LAT_SHOW_STORE(read);
				676	KYBER_LAT_SHOW_STORE(write);
				677	#undef KYBER_LAT_SHOW_STORE
				678
				679	#define KYBER_LAT_ATTR(op) __ATTR(op##_lat_nsec, 0644, kyber_##op##_lat_show, kyber_##op##_lat_store)
				680	static struct elv_fs_entry kyber_sched_attrs[] = {
				681	KYBER_LAT_ATTR(read),
				682	KYBER_LAT_ATTR(write),
				683	__ATTR_NULL
				684	};
				685	#undef KYBER_LAT_ATTR
				686
Omar Sandoval	16b738f	2017-05-04 00:31:33 -0700	[diff] [blame]	687	#ifdef CONFIG_BLK_DEBUG_FS
				688	#define KYBER_DEBUGFS_DOMAIN_ATTRS(domain, name) \
				689	static int kyber_##name##_tokens_show(void data, struct seq_file m) \
				690	{ \
				691	struct request_queue *q = data; \
				692	struct kyber_queue_data *kqd = q->elevator->elevator_data; \
				693	\
				694	sbitmap_queue_show(&kqd->domain_tokens[domain], m); \
				695	return 0; \
				696	} \
				697	\
				698	static void kyber_##name##_rqs_start(struct seq_file m, loff_t *pos) \
				699	__acquires(&khd->lock) \
				700	{ \
				701	struct blk_mq_hw_ctx *hctx = m->private; \
				702	struct kyber_hctx_data *khd = hctx->sched_data; \
				703	\
				704	spin_lock(&khd->lock); \
				705	return seq_list_start(&khd->rqs[domain], *pos); \
				706	} \
				707	\
				708	static void kyber_##name##_rqs_next(struct seq_file m, void *v, \
				709	loff_t *pos) \
				710	{ \
				711	struct blk_mq_hw_ctx *hctx = m->private; \
				712	struct kyber_hctx_data *khd = hctx->sched_data; \
				713	\
				714	return seq_list_next(v, &khd->rqs[domain], pos); \
				715	} \
				716	\
				717	static void kyber_##name##_rqs_stop(struct seq_file m, void v) \
				718	__releases(&khd->lock) \
				719	{ \
				720	struct blk_mq_hw_ctx *hctx = m->private; \
				721	struct kyber_hctx_data *khd = hctx->sched_data; \
				722	\
				723	spin_unlock(&khd->lock); \
				724	} \
				725	\
				726	static const struct seq_operations kyber_##name##_rqs_seq_ops = { \
				727	.start = kyber_##name##_rqs_start, \
				728	.next = kyber_##name##_rqs_next, \
				729	.stop = kyber_##name##_rqs_stop, \
				730	.show = blk_mq_debugfs_rq_show, \
				731	}; \
				732	\
				733	static int kyber_##name##_waiting_show(void data, struct seq_file m) \
				734	{ \
				735	struct blk_mq_hw_ctx *hctx = data; \
				736	struct kyber_hctx_data *khd = hctx->sched_data; \
				737	wait_queue_t *wait = &khd->domain_wait[domain]; \
				738	\
				739	seq_printf(m, "%d\n", !list_empty_careful(&wait->task_list)); \
				740	return 0; \
				741	}
				742	KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_READ, read)
				743	KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_SYNC_WRITE, sync_write)
				744	KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_OTHER, other)
				745	#undef KYBER_DEBUGFS_DOMAIN_ATTRS
				746
				747	static int kyber_async_depth_show(void data, struct seq_file m)
				748	{
				749	struct request_queue *q = data;
				750	struct kyber_queue_data *kqd = q->elevator->elevator_data;
				751
				752	seq_printf(m, "%u\n", kqd->async_depth);
				753	return 0;
				754	}
				755
				756	static int kyber_cur_domain_show(void data, struct seq_file m)
				757	{
				758	struct blk_mq_hw_ctx *hctx = data;
				759	struct kyber_hctx_data *khd = hctx->sched_data;
				760
				761	switch (khd->cur_domain) {
				762	case KYBER_READ:
				763	seq_puts(m, "READ\n");
				764	break;
				765	case KYBER_SYNC_WRITE:
				766	seq_puts(m, "SYNC_WRITE\n");
				767	break;
				768	case KYBER_OTHER:
				769	seq_puts(m, "OTHER\n");
				770	break;
				771	default:
				772	seq_printf(m, "%u\n", khd->cur_domain);
				773	break;
				774	}
				775	return 0;
				776	}
				777
				778	static int kyber_batching_show(void data, struct seq_file m)
				779	{
				780	struct blk_mq_hw_ctx *hctx = data;
				781	struct kyber_hctx_data *khd = hctx->sched_data;
				782
				783	seq_printf(m, "%u\n", khd->batching);
				784	return 0;
				785	}
				786
				787	#define KYBER_QUEUE_DOMAIN_ATTRS(name) \
				788	{#name "_tokens", 0400, kyber_##name##_tokens_show}
				789	static const struct blk_mq_debugfs_attr kyber_queue_debugfs_attrs[] = {
				790	KYBER_QUEUE_DOMAIN_ATTRS(read),
				791	KYBER_QUEUE_DOMAIN_ATTRS(sync_write),
				792	KYBER_QUEUE_DOMAIN_ATTRS(other),
				793	{"async_depth", 0400, kyber_async_depth_show},
				794	{},
				795	};
				796	#undef KYBER_QUEUE_DOMAIN_ATTRS
				797
				798	#define KYBER_HCTX_DOMAIN_ATTRS(name) \
				799	{#name "_rqs", 0400, .seq_ops = &kyber_##name##_rqs_seq_ops}, \
				800	{#name "_waiting", 0400, kyber_##name##_waiting_show}
				801	static const struct blk_mq_debugfs_attr kyber_hctx_debugfs_attrs[] = {
				802	KYBER_HCTX_DOMAIN_ATTRS(read),
				803	KYBER_HCTX_DOMAIN_ATTRS(sync_write),
				804	KYBER_HCTX_DOMAIN_ATTRS(other),
				805	{"cur_domain", 0400, kyber_cur_domain_show},
				806	{"batching", 0400, kyber_batching_show},
				807	{},
				808	};
				809	#undef KYBER_HCTX_DOMAIN_ATTRS
				810	#endif
				811
Omar Sandoval	00e0439	2017-04-14 01:00:02 -0700	[diff] [blame]	812	static struct elevator_type kyber_sched = {
				813	.ops.mq = {
				814	.init_sched = kyber_init_sched,
				815	.exit_sched = kyber_exit_sched,
				816	.init_hctx = kyber_init_hctx,
				817	.exit_hctx = kyber_exit_hctx,
				818	.get_request = kyber_get_request,
				819	.put_request = kyber_put_request,
				820	.completed_request = kyber_completed_request,
				821	.dispatch_request = kyber_dispatch_request,
				822	.has_work = kyber_has_work,
				823	},
				824	.uses_mq = true,
Omar Sandoval	16b738f	2017-05-04 00:31:33 -0700	[diff] [blame]	825	#ifdef CONFIG_BLK_DEBUG_FS
				826	.queue_debugfs_attrs = kyber_queue_debugfs_attrs,
				827	.hctx_debugfs_attrs = kyber_hctx_debugfs_attrs,
				828	#endif
Omar Sandoval	00e0439	2017-04-14 01:00:02 -0700	[diff] [blame]	829	.elevator_attrs = kyber_sched_attrs,
				830	.elevator_name = "kyber",
				831	.elevator_owner = THIS_MODULE,
				832	};
				833
				834	static int __init kyber_init(void)
				835	{
				836	return elv_register(&kyber_sched);
				837	}
				838
				839	static void __exit kyber_exit(void)
				840	{
				841	elv_unregister(&kyber_sched);
				842	}
				843
				844	module_init(kyber_init);
				845	module_exit(kyber_exit);
				846
				847	MODULE_AUTHOR("Omar Sandoval");
				848	MODULE_LICENSE("GPL");
				849	MODULE_DESCRIPTION("Kyber I/O scheduler");