blob: 8e19c9422eac3d7b84dca05aef213973e3dc846b [file] [log] [blame]
Tatyana Brokhman16349062012-09-20 10:46:10 +03001/*
2 * ROW (Read Over Write) I/O scheduler.
3 *
Tatyana Brokhmanbd56be32013-01-13 22:04:59 +02004 * Copyright (c) 2012-2013, The Linux Foundation. All rights reserved.
Tatyana Brokhman16349062012-09-20 10:46:10 +03005 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 and
8 * only version 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 */
15
16/* See Documentation/block/row-iosched.txt */
17
18#include <linux/kernel.h>
19#include <linux/fs.h>
20#include <linux/blkdev.h>
21#include <linux/elevator.h>
22#include <linux/bio.h>
23#include <linux/module.h>
24#include <linux/slab.h>
25#include <linux/init.h>
26#include <linux/compiler.h>
27#include <linux/blktrace_api.h>
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +020028#include <linux/hrtimer.h>
Tatyana Brokhman16349062012-09-20 10:46:10 +030029
30/*
31 * enum row_queue_prio - Priorities of the ROW queues
32 *
33 * This enum defines the priorities (and the number of queues)
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +020034 * the requests will be distributed to. The higher priority -
35 * the bigger is the "bus time" (or the dispatch quantum) given
36 * to that queue.
Tatyana Brokhman16349062012-09-20 10:46:10 +030037 * ROWQ_PRIO_HIGH_READ - is the higher priority queue.
38 *
39 */
40enum row_queue_prio {
41 ROWQ_PRIO_HIGH_READ = 0,
Tatyana Brokhman16349062012-09-20 10:46:10 +030042 ROWQ_PRIO_HIGH_SWRITE,
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +020043 ROWQ_PRIO_REG_READ,
Tatyana Brokhman16349062012-09-20 10:46:10 +030044 ROWQ_PRIO_REG_SWRITE,
45 ROWQ_PRIO_REG_WRITE,
46 ROWQ_PRIO_LOW_READ,
47 ROWQ_PRIO_LOW_SWRITE,
48 ROWQ_MAX_PRIO,
49};
50
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +020051/*
52 * The following indexes define the distribution of ROW queues according to
53 * priorities. Each index defines the first queue in that priority group.
54 */
55#define ROWQ_HIGH_PRIO_IDX ROWQ_PRIO_HIGH_READ
56#define ROWQ_REG_PRIO_IDX ROWQ_PRIO_REG_READ
57#define ROWQ_LOW_PRIO_IDX ROWQ_PRIO_LOW_READ
58
Tatyana Brokhman9375bcc2013-01-12 16:23:18 +020059/**
60 * struct row_queue_params - ROW queue parameters
61 * @idling_enabled: Flag indicating whether idling is enable on
62 * the queue
63 * @quantum: Number of requests to be dispatched from this queue
64 * in a dispatch cycle
65 * @is_urgent: Flags indicating whether the queue can notify on
66 * urgent requests
67 *
68 */
69struct row_queue_params {
70 bool idling_enabled;
71 int quantum;
72 bool is_urgent;
Tatyana Brokhman16349062012-09-20 10:46:10 +030073};
74
Tatyana Brokhman9375bcc2013-01-12 16:23:18 +020075/*
76 * This array holds the default values of the different configurables
77 * for each ROW queue. Each row of the array holds the following values:
78 * {idling_enabled, quantum, is_urgent}
79 * Each row corresponds to a queue with the same index (according to
80 * enum row_queue_prio)
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +020081 * Note: The quantums are valid inside their priority type. For example:
82 * For every 10 high priority read requests, 1 high priority sync
83 * write will be dispatched.
84 * For every 100 regular read requests 1 regular write request will
85 * be dispatched.
Tatyana Brokhman9375bcc2013-01-12 16:23:18 +020086 */
87static const struct row_queue_params row_queues_def[] = {
88/* idling_enabled, quantum, is_urgent */
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +020089 {true, 10, true}, /* ROWQ_PRIO_HIGH_READ */
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +020090 {false, 1, false}, /* ROWQ_PRIO_HIGH_SWRITE */
Tatyana Brokhman9375bcc2013-01-12 16:23:18 +020091 {true, 100, true}, /* ROWQ_PRIO_REG_READ */
Tatyana Brokhman9375bcc2013-01-12 16:23:18 +020092 {false, 1, false}, /* ROWQ_PRIO_REG_SWRITE */
93 {false, 1, false}, /* ROWQ_PRIO_REG_WRITE */
94 {false, 1, false}, /* ROWQ_PRIO_LOW_READ */
95 {false, 1, false} /* ROWQ_PRIO_LOW_SWRITE */
Tatyana Brokhman16349062012-09-20 10:46:10 +030096};
97
Tatyana Brokhmanbfb04f62012-12-06 13:17:19 +020098/* Default values for idling on read queues (in msec) */
99#define ROW_IDLE_TIME_MSEC 5
Lee Susmand2439092013-06-23 16:27:40 +0300100#define ROW_READ_FREQ_MSEC 5
Tatyana Brokhman16349062012-09-20 10:46:10 +0300101
102/**
103 * struct rowq_idling_data - parameters for idling on the queue
Tatyana Brokhmanbfb04f62012-12-06 13:17:19 +0200104 * @last_insert_time: time the last request was inserted
105 * to the queue
Tatyana Brokhman16349062012-09-20 10:46:10 +0300106 * @begin_idling: flag indicating wether we should idle
107 *
108 */
109struct rowq_idling_data {
Tatyana Brokhmanbfb04f62012-12-06 13:17:19 +0200110 ktime_t last_insert_time;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300111 bool begin_idling;
112};
113
114/**
115 * struct row_queue - requests grouping structure
116 * @rdata: parent row_data structure
117 * @fifo: fifo of requests
118 * @prio: queue priority (enum row_queue_prio)
119 * @nr_dispatched: number of requests already dispatched in
120 * the current dispatch cycle
Tatyana Brokhmanbd56be32013-01-13 22:04:59 +0200121 * @nr_req: number of requests in queue
Tatyana Brokhman8a970bc2013-01-12 16:21:12 +0200122 * @dispatch quantum: number of requests this queue may
123 * dispatch in a dispatch cycle
Tatyana Brokhman16349062012-09-20 10:46:10 +0300124 * @idle_data: data for idling on queues
125 *
126 */
127struct row_queue {
128 struct row_data *rdata;
129 struct list_head fifo;
130 enum row_queue_prio prio;
131
132 unsigned int nr_dispatched;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300133
Tatyana Brokhmanbd56be32013-01-13 22:04:59 +0200134 unsigned int nr_req;
Tatyana Brokhman8a970bc2013-01-12 16:21:12 +0200135 int disp_quantum;
Tatyana Brokhmanbd56be32013-01-13 22:04:59 +0200136
Tatyana Brokhman16349062012-09-20 10:46:10 +0300137 /* used only for READ queues */
138 struct rowq_idling_data idle_data;
139};
140
141/**
142 * struct idling_data - data for idling on empty rqueue
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +0200143 * @idle_time_ms: idling duration (msec)
144 * @freq_ms: min time between two requests that
Tatyana Brokhman16349062012-09-20 10:46:10 +0300145 * triger idling (msec)
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +0200146 * @hr_timer: idling timer
147 * @idle_work: the work to be scheduled when idling timer expires
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200148 * @idling_queue_idx: index of the queues we're idling on
Tatyana Brokhman16349062012-09-20 10:46:10 +0300149 *
150 */
151struct idling_data {
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +0200152 s64 idle_time_ms;
153 s64 freq_ms;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300154
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +0200155 struct hrtimer hr_timer;
156 struct work_struct idle_work;
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200157 enum row_queue_prio idling_queue_idx;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300158};
159
160/**
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200161 * struct starvation_data - data for starvation management
162 * @starvation_limit: number of times this priority class
163 * can tolerate being starved
164 * @starvation_counter: number of requests from higher
165 * priority classes that were dispatched while this
166 * priority request were pending
167 *
168 */
169struct starvation_data {
170 int starvation_limit;
171 int starvation_counter;
172};
173
174/**
Tatyana Brokhman16349062012-09-20 10:46:10 +0300175 * struct row_queue - Per block device rqueue structure
176 * @dispatch_queue: dispatch rqueue
Tatyana Brokhman8a970bc2013-01-12 16:21:12 +0200177 * @row_queues: array of priority request queues
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200178 * @rd_idle_data: data for idling after READ request
Tatyana Brokhman16349062012-09-20 10:46:10 +0300179 * @nr_reqs: nr_reqs[0] holds the number of all READ requests in
180 * scheduler, nr_reqs[1] holds the number of all WRITE
181 * requests in scheduler
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200182 * @urgent_in_flight: flag indicating that there is an urgent
183 * request that was dispatched to driver and is yet to
184 * complete.
185 * @pending_urgent_rq: pointer to the pending urgent request
186 * @last_served_ioprio_class: I/O priority class that was last dispatched from
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200187 * @reg_prio_starvation: starvation data for REGULAR priority queues
188 * @low_prio_starvation: starvation data for LOW priority queues
Tatyana Brokhman16349062012-09-20 10:46:10 +0300189 * @cycle_flags: used for marking unserved queueus
190 *
191 */
192struct row_data {
193 struct request_queue *dispatch_queue;
194
Tatyana Brokhman8a970bc2013-01-12 16:21:12 +0200195 struct row_queue row_queues[ROWQ_MAX_PRIO];
Tatyana Brokhman16349062012-09-20 10:46:10 +0300196
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200197 struct idling_data rd_idle_data;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300198 unsigned int nr_reqs[2];
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200199 bool urgent_in_flight;
200 struct request *pending_urgent_rq;
201 int last_served_ioprio_class;
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200202
Maya Erez7fd5fad2013-04-14 15:19:52 +0300203#define ROW_REG_STARVATION_TOLLERANCE 5000
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200204 struct starvation_data reg_prio_starvation;
Maya Erez7fd5fad2013-04-14 15:19:52 +0300205#define ROW_LOW_STARVATION_TOLLERANCE 10000
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200206 struct starvation_data low_prio_starvation;
207
Tatyana Brokhman16349062012-09-20 10:46:10 +0300208 unsigned int cycle_flags;
209};
210
211#define RQ_ROWQ(rq) ((struct row_queue *) ((rq)->elv.priv[0]))
212
213#define row_log(q, fmt, args...) \
214 blk_add_trace_msg(q, "%s():" fmt , __func__, ##args)
215#define row_log_rowq(rdata, rowq_id, fmt, args...) \
216 blk_add_trace_msg(rdata->dispatch_queue, "rowq%d " fmt, \
217 rowq_id, ##args)
218
219static inline void row_mark_rowq_unserved(struct row_data *rd,
220 enum row_queue_prio qnum)
221{
222 rd->cycle_flags |= (1 << qnum);
223}
224
225static inline void row_clear_rowq_unserved(struct row_data *rd,
226 enum row_queue_prio qnum)
227{
228 rd->cycle_flags &= ~(1 << qnum);
229}
230
231static inline int row_rowq_unserved(struct row_data *rd,
232 enum row_queue_prio qnum)
233{
234 return rd->cycle_flags & (1 << qnum);
235}
236
Tatyana Brokhmanbd56be32013-01-13 22:04:59 +0200237static inline void __maybe_unused row_dump_queues_stat(struct row_data *rd)
238{
239 int i;
240
Tatyana Brokhman8a970bc2013-01-12 16:21:12 +0200241 row_log(rd->dispatch_queue, " Queues status:");
Tatyana Brokhmanbd56be32013-01-13 22:04:59 +0200242 for (i = 0; i < ROWQ_MAX_PRIO; i++)
243 row_log(rd->dispatch_queue,
244 "queue%d: dispatched= %d, nr_req=%d", i,
245 rd->row_queues[i].nr_dispatched,
246 rd->row_queues[i].nr_req);
247}
248
Tatyana Brokhman16349062012-09-20 10:46:10 +0300249/******************** Static helper functions ***********************/
Tatyana Brokhman16349062012-09-20 10:46:10 +0300250static void kick_queue(struct work_struct *work)
251{
Tatyana Brokhman16349062012-09-20 10:46:10 +0300252 struct idling_data *read_data =
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +0200253 container_of(work, struct idling_data, idle_work);
254 struct row_data *rd =
255 container_of(read_data, struct row_data, rd_idle_data);
256
257 blk_run_queue(rd->dispatch_queue);
258}
259
260
261static enum hrtimer_restart row_idle_hrtimer_fn(struct hrtimer *hr_timer)
262{
263 struct idling_data *read_data =
264 container_of(hr_timer, struct idling_data, hr_timer);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300265 struct row_data *rd =
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200266 container_of(read_data, struct row_data, rd_idle_data);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300267
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200268 row_log_rowq(rd, rd->rd_idle_data.idling_queue_idx,
269 "Performing delayed work");
Tatyana Brokhman16349062012-09-20 10:46:10 +0300270 /* Mark idling process as done */
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200271 rd->row_queues[rd->rd_idle_data.idling_queue_idx].
272 idle_data.begin_idling = false;
273 rd->rd_idle_data.idling_queue_idx = ROWQ_MAX_PRIO;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300274
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200275 if (!rd->nr_reqs[READ] && !rd->nr_reqs[WRITE])
Tatyana Brokhman16349062012-09-20 10:46:10 +0300276 row_log(rd->dispatch_queue, "No requests in scheduler");
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +0200277 else
278 kblockd_schedule_work(rd->dispatch_queue,
279 &read_data->idle_work);
280 return HRTIMER_NORESTART;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300281}
282
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200283/*
284 * row_regular_req_pending() - Check if there are REGULAR priority requests
285 * Pending in scheduler
286 * @rd: pointer to struct row_data
287 *
288 * Returns True if there are REGULAR priority requests in scheduler queues.
289 * False, otherwise.
290 */
291static inline bool row_regular_req_pending(struct row_data *rd)
292{
293 int i;
294
295 for (i = ROWQ_REG_PRIO_IDX; i < ROWQ_LOW_PRIO_IDX; i++)
296 if (!list_empty(&rd->row_queues[i].fifo))
297 return true;
298 return false;
299}
300
301/*
302 * row_low_req_pending() - Check if there are LOW priority requests
303 * Pending in scheduler
304 * @rd: pointer to struct row_data
305 *
306 * Returns True if there are LOW priority requests in scheduler queues.
307 * False, otherwise.
308 */
309static inline bool row_low_req_pending(struct row_data *rd)
310{
311 int i;
312
313 for (i = ROWQ_LOW_PRIO_IDX; i < ROWQ_MAX_PRIO; i++)
314 if (!list_empty(&rd->row_queues[i].fifo))
315 return true;
316 return false;
317}
318
Tatyana Brokhman16349062012-09-20 10:46:10 +0300319/******************* Elevator callback functions *********************/
320
321/*
322 * row_add_request() - Add request to the scheduler
323 * @q: requests queue
324 * @rq: request to add
325 *
326 */
327static void row_add_request(struct request_queue *q,
328 struct request *rq)
329{
330 struct row_data *rd = (struct row_data *)q->elevator->elevator_data;
331 struct row_queue *rqueue = RQ_ROWQ(rq);
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200332 s64 diff_ms;
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200333 bool queue_was_empty = list_empty(&rqueue->fifo);
Lee Susmand2439092013-06-23 16:27:40 +0300334 unsigned long bv_page_flags = 0;
335
336 if (rq->bio && rq->bio->bi_io_vec && rq->bio->bi_io_vec->bv_page)
337 bv_page_flags = rq->bio->bi_io_vec->bv_page->flags;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300338
339 list_add_tail(&rq->queuelist, &rqueue->fifo);
340 rd->nr_reqs[rq_data_dir(rq)]++;
Tatyana Brokhmanbd56be32013-01-13 22:04:59 +0200341 rqueue->nr_req++;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300342 rq_set_fifo_time(rq, jiffies); /* for statistics*/
343
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200344 if (rq->cmd_flags & REQ_URGENT) {
345 WARN_ON(1);
346 blk_dump_rq_flags(rq, "");
347 rq->cmd_flags &= ~REQ_URGENT;
348 }
349
Tatyana Brokhman9375bcc2013-01-12 16:23:18 +0200350 if (row_queues_def[rqueue->prio].idling_enabled) {
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200351 if (rd->rd_idle_data.idling_queue_idx == rqueue->prio &&
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +0200352 hrtimer_active(&rd->rd_idle_data.hr_timer)) {
Tatyana Brokhmana1d6f9e2013-07-02 14:43:13 +0300353 if (hrtimer_try_to_cancel(
354 &rd->rd_idle_data.hr_timer) >= 0) {
355 row_log_rowq(rd, rqueue->prio,
356 "Canceled delayed work on %d",
357 rd->rd_idle_data.idling_queue_idx);
358 rd->rd_idle_data.idling_queue_idx =
359 ROWQ_MAX_PRIO;
360 }
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200361 }
362 diff_ms = ktime_to_ms(ktime_sub(ktime_get(),
363 rqueue->idle_data.last_insert_time));
364 if (unlikely(diff_ms < 0)) {
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200365 pr_err("%s(): time delta error: diff_ms < 0",
366 __func__);
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200367 rqueue->idle_data.begin_idling = false;
368 return;
369 }
Lee Susmand2439092013-06-23 16:27:40 +0300370
371 if ((bv_page_flags & (1L << PG_readahead)) ||
372 (diff_ms < rd->rd_idle_data.freq_ms)) {
Tatyana Brokhman16349062012-09-20 10:46:10 +0300373 rqueue->idle_data.begin_idling = true;
374 row_log_rowq(rd, rqueue->prio, "Enable idling");
Tatyana Brokhmanbfb04f62012-12-06 13:17:19 +0200375 } else {
Tatyana Brokhman16349062012-09-20 10:46:10 +0300376 rqueue->idle_data.begin_idling = false;
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200377 row_log_rowq(rd, rqueue->prio, "Disable idling (%ldms)",
378 (long)diff_ms);
Tatyana Brokhmanbfb04f62012-12-06 13:17:19 +0200379 }
Tatyana Brokhman16349062012-09-20 10:46:10 +0300380
Tatyana Brokhmanbfb04f62012-12-06 13:17:19 +0200381 rqueue->idle_data.last_insert_time = ktime_get();
Tatyana Brokhman16349062012-09-20 10:46:10 +0300382 }
Tatyana Brokhman9375bcc2013-01-12 16:23:18 +0200383 if (row_queues_def[rqueue->prio].is_urgent &&
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200384 !rd->pending_urgent_rq && !rd->urgent_in_flight) {
385 /* Handle High Priority queues */
386 if (rqueue->prio < ROWQ_REG_PRIO_IDX &&
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200387 rd->last_served_ioprio_class != IOPRIO_CLASS_RT &&
388 queue_was_empty) {
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200389 row_log_rowq(rd, rqueue->prio,
390 "added (high prio) urgent request");
391 rq->cmd_flags |= REQ_URGENT;
392 rd->pending_urgent_rq = rq;
393 } else if (row_rowq_unserved(rd, rqueue->prio)) {
394 /* Handle Regular priotity queues */
395 row_log_rowq(rd, rqueue->prio,
396 "added urgent request (total on queue=%d)",
397 rqueue->nr_req);
398 rq->cmd_flags |= REQ_URGENT;
399 WARN_ON(rqueue->nr_req > 1);
400 rd->pending_urgent_rq = rq;
401 }
Tatyana Brokhman0ef81432012-12-20 19:23:58 +0200402 } else
Tatyana Brokhmanbd56be32013-01-13 22:04:59 +0200403 row_log_rowq(rd, rqueue->prio,
404 "added request (total on queue=%d)", rqueue->nr_req);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300405}
406
Tatyana Brokhmanb7bf9ac2012-10-30 08:33:06 +0200407/**
408 * row_reinsert_req() - Reinsert request back to the scheduler
409 * @q: requests queue
410 * @rq: request to add
411 *
412 * Reinsert the given request back to the queue it was
413 * dispatched from as if it was never dispatched.
414 *
415 * Returns 0 on success, error code otherwise
416 */
417static int row_reinsert_req(struct request_queue *q,
418 struct request *rq)
419{
420 struct row_data *rd = q->elevator->elevator_data;
421 struct row_queue *rqueue = RQ_ROWQ(rq);
422
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200423 if (!rqueue || rqueue->prio >= ROWQ_MAX_PRIO)
Tatyana Brokhmanb7bf9ac2012-10-30 08:33:06 +0200424 return -EIO;
Tatyana Brokhmanb7bf9ac2012-10-30 08:33:06 +0200425
426 list_add(&rq->queuelist, &rqueue->fifo);
427 rd->nr_reqs[rq_data_dir(rq)]++;
Tatyana Brokhmanbd56be32013-01-13 22:04:59 +0200428 rqueue->nr_req++;
Tatyana Brokhmanb7bf9ac2012-10-30 08:33:06 +0200429
Tatyana Brokhmanbd56be32013-01-13 22:04:59 +0200430 row_log_rowq(rd, rqueue->prio,
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200431 "%s request reinserted (total on queue=%d)",
432 (rq_data_dir(rq) == READ ? "READ" : "write"), rqueue->nr_req);
Tatyana Brokhmanb7bf9ac2012-10-30 08:33:06 +0200433
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200434 if (rq->cmd_flags & REQ_URGENT) {
435 /*
436 * It's not compliant with the design to re-insert
437 * urgent requests. We want to be able to track this
438 * down.
439 */
440 WARN_ON(1);
441 if (!rd->urgent_in_flight) {
442 pr_err("%s(): no urgent in flight", __func__);
443 } else {
444 rd->urgent_in_flight = false;
445 pr_err("%s(): reinserting URGENT %s req",
446 __func__,
447 (rq_data_dir(rq) == READ ? "READ" : "WRITE"));
448 if (rd->pending_urgent_rq) {
449 pr_err("%s(): urgent rq is pending",
450 __func__);
451 rd->pending_urgent_rq->cmd_flags &= ~REQ_URGENT;
452 }
453 rd->pending_urgent_rq = rq;
454 }
455 }
Tatyana Brokhmanb7bf9ac2012-10-30 08:33:06 +0200456 return 0;
457}
458
Tatyana Brokhman4c3c3cc2013-01-24 15:08:40 +0200459static void row_completed_req(struct request_queue *q, struct request *rq)
460{
461 struct row_data *rd = q->elevator->elevator_data;
462
463 if (rq->cmd_flags & REQ_URGENT) {
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200464 if (!rd->urgent_in_flight) {
465 WARN_ON(1);
466 pr_err("%s(): URGENT req but urgent_in_flight = F",
Tatyana Brokhman4c3c3cc2013-01-24 15:08:40 +0200467 __func__);
Tatyana Brokhman4c3c3cc2013-01-24 15:08:40 +0200468 }
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200469 rd->urgent_in_flight = false;
470 rq->cmd_flags &= ~REQ_URGENT;
Tatyana Brokhman4c3c3cc2013-01-24 15:08:40 +0200471 }
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200472 row_log(q, "completed %s %s req.",
473 (rq->cmd_flags & REQ_URGENT ? "URGENT" : "regular"),
474 (rq_data_dir(rq) == READ ? "READ" : "WRITE"));
Tatyana Brokhman4c3c3cc2013-01-24 15:08:40 +0200475}
476
Tatyana Brokhmanb7bf9ac2012-10-30 08:33:06 +0200477/**
Tatyana Brokhman0ef81432012-12-20 19:23:58 +0200478 * row_urgent_pending() - Return TRUE if there is an urgent
479 * request on scheduler
480 * @q: requests queue
481 */
482static bool row_urgent_pending(struct request_queue *q)
483{
484 struct row_data *rd = q->elevator->elevator_data;
Tatyana Brokhman0ef81432012-12-20 19:23:58 +0200485
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200486 if (rd->urgent_in_flight) {
Tatyana Brokhman4c3c3cc2013-01-24 15:08:40 +0200487 row_log(rd->dispatch_queue, "%d urgent requests in flight",
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200488 rd->urgent_in_flight);
Tatyana Brokhman4c3c3cc2013-01-24 15:08:40 +0200489 return false;
490 }
491
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200492 if (rd->pending_urgent_rq) {
493 row_log(rd->dispatch_queue, "Urgent request pending");
494 return true;
495 }
Shashank Babu Chinta Venkata3df69bf2013-02-26 17:33:55 -0800496
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200497 row_log(rd->dispatch_queue, "no urgent request pending/in flight");
Tatyana Brokhman0ef81432012-12-20 19:23:58 +0200498 return false;
499}
500
501/**
Tatyana Brokhman16349062012-09-20 10:46:10 +0300502 * row_remove_request() - Remove given request from scheduler
503 * @q: requests queue
504 * @rq: request to remove
505 *
506 */
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200507static void row_remove_request(struct row_data *rd,
Tatyana Brokhman16349062012-09-20 10:46:10 +0300508 struct request *rq)
509{
Tatyana Brokhmanbd56be32013-01-13 22:04:59 +0200510 struct row_queue *rqueue = RQ_ROWQ(rq);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300511
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200512 list_del_init(&(rq)->queuelist);
513 if (rd->pending_urgent_rq == rq)
514 rd->pending_urgent_rq = NULL;
515 else
516 BUG_ON(rq->cmd_flags & REQ_URGENT);
Tatyana Brokhmanbd56be32013-01-13 22:04:59 +0200517 rqueue->nr_req--;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300518 rd->nr_reqs[rq_data_dir(rq)]--;
519}
520
521/*
522 * row_dispatch_insert() - move request to dispatch queue
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200523 * @rd: pointer to struct row_data
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200524 * @rq: the request to dispatch
Tatyana Brokhman16349062012-09-20 10:46:10 +0300525 *
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200526 * This function moves the given request to the dispatch queue
Tatyana Brokhman16349062012-09-20 10:46:10 +0300527 *
528 */
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200529static void row_dispatch_insert(struct row_data *rd, struct request *rq)
Tatyana Brokhman16349062012-09-20 10:46:10 +0300530{
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200531 struct row_queue *rqueue = RQ_ROWQ(rq);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300532
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200533 row_remove_request(rd, rq);
534 elv_dispatch_sort(rd->dispatch_queue, rq);
535 if (rq->cmd_flags & REQ_URGENT) {
536 WARN_ON(rd->urgent_in_flight);
537 rd->urgent_in_flight = true;
538 }
539 rqueue->nr_dispatched++;
540 row_clear_rowq_unserved(rd, rqueue->prio);
541 row_log_rowq(rd, rqueue->prio,
542 " Dispatched request %p nr_disp = %d", rq,
543 rqueue->nr_dispatched);
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200544 if (rqueue->prio < ROWQ_REG_PRIO_IDX) {
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200545 rd->last_served_ioprio_class = IOPRIO_CLASS_RT;
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200546 if (row_regular_req_pending(rd))
547 rd->reg_prio_starvation.starvation_counter++;
548 if (row_low_req_pending(rd))
549 rd->low_prio_starvation.starvation_counter++;
550 } else if (rqueue->prio < ROWQ_LOW_PRIO_IDX) {
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200551 rd->last_served_ioprio_class = IOPRIO_CLASS_BE;
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200552 rd->reg_prio_starvation.starvation_counter = 0;
553 if (row_low_req_pending(rd))
554 rd->low_prio_starvation.starvation_counter++;
555 } else {
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200556 rd->last_served_ioprio_class = IOPRIO_CLASS_IDLE;
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200557 rd->low_prio_starvation.starvation_counter = 0;
558 }
Tatyana Brokhman16349062012-09-20 10:46:10 +0300559}
560
561/*
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200562 * row_get_ioprio_class_to_serve() - Return the next I/O priority
563 * class to dispatch requests from
Tatyana Brokhman16349062012-09-20 10:46:10 +0300564 * @rd: pointer to struct row_data
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200565 * @force: flag indicating if forced dispatch
Tatyana Brokhman16349062012-09-20 10:46:10 +0300566 *
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200567 * This function returns the next I/O priority class to serve
568 * {IOPRIO_CLASS_NONE, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE}.
569 * If there are no more requests in scheduler or if we're idling on some queue
570 * IOPRIO_CLASS_NONE will be returned.
571 * If idling is scheduled on a lower priority queue than the one that needs
572 * to be served, it will be canceled.
Tatyana Brokhman16349062012-09-20 10:46:10 +0300573 *
574 */
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200575static int row_get_ioprio_class_to_serve(struct row_data *rd, int force)
Tatyana Brokhman16349062012-09-20 10:46:10 +0300576{
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200577 int i;
578 int ret = IOPRIO_CLASS_NONE;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300579
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200580 if (!rd->nr_reqs[READ] && !rd->nr_reqs[WRITE]) {
Tatyana Brokhman16349062012-09-20 10:46:10 +0300581 row_log(rd->dispatch_queue, "No more requests in scheduler");
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200582 goto check_idling;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300583 }
584
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200585 /* First, go over the high priority queues */
586 for (i = 0; i < ROWQ_REG_PRIO_IDX; i++) {
587 if (!list_empty(&rd->row_queues[i].fifo)) {
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +0200588 if (hrtimer_active(&rd->rd_idle_data.hr_timer)) {
Tatyana Brokhmana1d6f9e2013-07-02 14:43:13 +0300589 if (hrtimer_try_to_cancel(
590 &rd->rd_idle_data.hr_timer) >= 0) {
591 row_log(rd->dispatch_queue,
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200592 "Canceling delayed work on %d. RT pending",
Tatyana Brokhmana1d6f9e2013-07-02 14:43:13 +0300593 rd->rd_idle_data.idling_queue_idx);
594 rd->rd_idle_data.idling_queue_idx =
595 ROWQ_MAX_PRIO;
596 }
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200597 }
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200598
599 if (row_regular_req_pending(rd) &&
600 (rd->reg_prio_starvation.starvation_counter >=
601 rd->reg_prio_starvation.starvation_limit))
602 ret = IOPRIO_CLASS_BE;
603 else if (row_low_req_pending(rd) &&
604 (rd->low_prio_starvation.starvation_counter >=
605 rd->low_prio_starvation.starvation_limit))
606 ret = IOPRIO_CLASS_IDLE;
607 else
608 ret = IOPRIO_CLASS_RT;
609
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200610 goto done;
611 }
612 }
Tatyana Brokhman16349062012-09-20 10:46:10 +0300613
614 /*
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200615 * At the moment idling is implemented only for READ queues.
616 * If enabled on WRITE, this needs updating
Tatyana Brokhman16349062012-09-20 10:46:10 +0300617 */
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +0200618 if (hrtimer_active(&rd->rd_idle_data.hr_timer)) {
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200619 row_log(rd->dispatch_queue, "Delayed work pending. Exiting");
620 goto done;
621 }
622check_idling:
623 /* Check for (high priority) idling and enable if needed */
624 for (i = 0; i < ROWQ_REG_PRIO_IDX && !force; i++) {
625 if (rd->row_queues[i].idle_data.begin_idling &&
626 row_queues_def[i].idling_enabled)
627 goto initiate_idling;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300628 }
629
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200630 /* Regular priority queues */
631 for (i = ROWQ_REG_PRIO_IDX; i < ROWQ_LOW_PRIO_IDX; i++) {
632 if (list_empty(&rd->row_queues[i].fifo)) {
633 /* We can idle only if this is not a forced dispatch */
634 if (rd->row_queues[i].idle_data.begin_idling &&
635 !force && row_queues_def[i].idling_enabled)
636 goto initiate_idling;
637 } else {
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200638 if (row_low_req_pending(rd) &&
639 (rd->low_prio_starvation.starvation_counter >=
640 rd->low_prio_starvation.starvation_limit))
641 ret = IOPRIO_CLASS_IDLE;
642 else
643 ret = IOPRIO_CLASS_BE;
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200644 goto done;
645 }
646 }
647
648 if (rd->nr_reqs[READ] || rd->nr_reqs[WRITE])
649 ret = IOPRIO_CLASS_IDLE;
650 goto done;
651
652initiate_idling:
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +0200653 hrtimer_start(&rd->rd_idle_data.hr_timer,
654 ktime_set(0, rd->rd_idle_data.idle_time_ms * NSEC_PER_MSEC),
655 HRTIMER_MODE_REL);
656
657 rd->rd_idle_data.idling_queue_idx = i;
658 row_log_rowq(rd, i, "Scheduled delayed work on %d. exiting", i);
659
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200660done:
661 return ret;
662}
663
664static void row_restart_cycle(struct row_data *rd,
665 int start_idx, int end_idx)
666{
667 int i;
668
669 row_dump_queues_stat(rd);
670 for (i = start_idx; i < end_idx; i++) {
671 if (rd->row_queues[i].nr_dispatched <
672 rd->row_queues[i].disp_quantum)
673 row_mark_rowq_unserved(rd, i);
674 rd->row_queues[i].nr_dispatched = 0;
675 }
676 row_log(rd->dispatch_queue, "Restarting cycle for class @ %d-%d",
677 start_idx, end_idx);
678}
679
680/*
681 * row_get_next_queue() - selects the next queue to dispatch from
682 * @q: requests queue
683 * @rd: pointer to struct row_data
684 * @start_idx/end_idx: indexes in the row_queues array to select a queue
685 * from.
686 *
687 * Return index of the queues to dispatch from. Error code if fails.
688 *
689 */
690static int row_get_next_queue(struct request_queue *q, struct row_data *rd,
691 int start_idx, int end_idx)
692{
693 int i = start_idx;
694 bool restart = true;
695 int ret = -EIO;
696
697 do {
698 if (list_empty(&rd->row_queues[i].fifo) ||
699 rd->row_queues[i].nr_dispatched >=
700 rd->row_queues[i].disp_quantum) {
701 i++;
702 if (i == end_idx && restart) {
703 /* Restart cycle for this priority class */
704 row_restart_cycle(rd, start_idx, end_idx);
705 i = start_idx;
706 restart = false;
707 }
708 } else {
709 ret = i;
710 break;
711 }
712 } while (i < end_idx);
713
714 return ret;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300715}
716
717/*
718 * row_dispatch_requests() - selects the next request to dispatch
719 * @q: requests queue
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200720 * @force: flag indicating if forced dispatch
Tatyana Brokhman16349062012-09-20 10:46:10 +0300721 *
722 * Return 0 if no requests were moved to the dispatch queue.
723 * 1 otherwise
724 *
725 */
726static int row_dispatch_requests(struct request_queue *q, int force)
727{
728 struct row_data *rd = (struct row_data *)q->elevator->elevator_data;
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200729 int ret = 0, currq, ioprio_class_to_serve, start_idx, end_idx;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300730
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +0200731 if (force && hrtimer_active(&rd->rd_idle_data.hr_timer)) {
Tatyana Brokhmana1d6f9e2013-07-02 14:43:13 +0300732 if (hrtimer_try_to_cancel(&rd->rd_idle_data.hr_timer) >= 0) {
733 row_log(rd->dispatch_queue,
734 "Canceled delayed work on %d - forced dispatch",
735 rd->rd_idle_data.idling_queue_idx);
736 rd->rd_idle_data.idling_queue_idx = ROWQ_MAX_PRIO;
737 }
Tatyana Brokhman16349062012-09-20 10:46:10 +0300738 }
739
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200740 if (rd->pending_urgent_rq) {
741 row_log(rd->dispatch_queue, "dispatching urgent request");
742 row_dispatch_insert(rd, rd->pending_urgent_rq);
743 ret = 1;
744 goto done;
745 }
746
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200747 ioprio_class_to_serve = row_get_ioprio_class_to_serve(rd, force);
748 row_log(rd->dispatch_queue, "Dispatching from %d priority class",
749 ioprio_class_to_serve);
750
751 switch (ioprio_class_to_serve) {
752 case IOPRIO_CLASS_NONE:
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200753 rd->last_served_ioprio_class = IOPRIO_CLASS_NONE;
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200754 goto done;
755 case IOPRIO_CLASS_RT:
756 start_idx = ROWQ_HIGH_PRIO_IDX;
757 end_idx = ROWQ_REG_PRIO_IDX;
758 break;
759 case IOPRIO_CLASS_BE:
760 start_idx = ROWQ_REG_PRIO_IDX;
761 end_idx = ROWQ_LOW_PRIO_IDX;
762 break;
763 case IOPRIO_CLASS_IDLE:
764 start_idx = ROWQ_LOW_PRIO_IDX;
765 end_idx = ROWQ_MAX_PRIO;
766 break;
767 default:
768 pr_err("%s(): Invalid I/O priority class", __func__);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300769 goto done;
770 }
771
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200772 currq = row_get_next_queue(q, rd, start_idx, end_idx);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300773
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200774 /* Dispatch */
775 if (currq >= 0) {
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200776 row_dispatch_insert(rd,
777 rq_entry_fifo(rd->row_queues[currq].fifo.next));
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200778 ret = 1;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300779 }
Tatyana Brokhman16349062012-09-20 10:46:10 +0300780done:
781 return ret;
782}
783
784/*
785 * row_init_queue() - Init scheduler data structures
786 * @q: requests queue
787 *
788 * Return pointer to struct row_data to be saved in elevator for
789 * this dispatch queue
790 *
791 */
792static void *row_init_queue(struct request_queue *q)
793{
794
795 struct row_data *rdata;
796 int i;
797
798 rdata = kmalloc_node(sizeof(*rdata),
799 GFP_KERNEL | __GFP_ZERO, q->node);
800 if (!rdata)
801 return NULL;
802
Tatyana Brokhman522778f2013-01-24 16:17:27 +0200803 memset(rdata, 0, sizeof(*rdata));
Tatyana Brokhman16349062012-09-20 10:46:10 +0300804 for (i = 0; i < ROWQ_MAX_PRIO; i++) {
Tatyana Brokhman8a970bc2013-01-12 16:21:12 +0200805 INIT_LIST_HEAD(&rdata->row_queues[i].fifo);
Tatyana Brokhman9375bcc2013-01-12 16:23:18 +0200806 rdata->row_queues[i].disp_quantum = row_queues_def[i].quantum;
Tatyana Brokhman8a970bc2013-01-12 16:21:12 +0200807 rdata->row_queues[i].rdata = rdata;
808 rdata->row_queues[i].prio = i;
809 rdata->row_queues[i].idle_data.begin_idling = false;
810 rdata->row_queues[i].idle_data.last_insert_time =
Tatyana Brokhmanbfb04f62012-12-06 13:17:19 +0200811 ktime_set(0, 0);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300812 }
813
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200814 rdata->reg_prio_starvation.starvation_limit =
815 ROW_REG_STARVATION_TOLLERANCE;
816 rdata->low_prio_starvation.starvation_limit =
817 ROW_LOW_STARVATION_TOLLERANCE;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300818 /*
819 * Currently idling is enabled only for READ queues. If we want to
820 * enable it for write queues also, note that idling frequency will
821 * be the same in both cases
822 */
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +0200823 rdata->rd_idle_data.idle_time_ms = ROW_IDLE_TIME_MSEC;
824 rdata->rd_idle_data.freq_ms = ROW_READ_FREQ_MSEC;
825 hrtimer_init(&rdata->rd_idle_data.hr_timer,
826 CLOCK_MONOTONIC, HRTIMER_MODE_REL);
827 rdata->rd_idle_data.hr_timer.function = &row_idle_hrtimer_fn;
828
829 INIT_WORK(&rdata->rd_idle_data.idle_work, kick_queue);
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200830 rdata->last_served_ioprio_class = IOPRIO_CLASS_NONE;
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200831 rdata->rd_idle_data.idling_queue_idx = ROWQ_MAX_PRIO;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300832 rdata->dispatch_queue = q;
833
Tatyana Brokhman16349062012-09-20 10:46:10 +0300834 return rdata;
835}
836
837/*
838 * row_exit_queue() - called on unloading the RAW scheduler
839 * @e: poiner to struct elevator_queue
840 *
841 */
842static void row_exit_queue(struct elevator_queue *e)
843{
844 struct row_data *rd = (struct row_data *)e->elevator_data;
845 int i;
846
847 for (i = 0; i < ROWQ_MAX_PRIO; i++)
Tatyana Brokhman8a970bc2013-01-12 16:21:12 +0200848 BUG_ON(!list_empty(&rd->row_queues[i].fifo));
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +0200849 if (hrtimer_cancel(&rd->rd_idle_data.hr_timer))
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200850 pr_err("%s(): idle timer was active!", __func__);
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200851 rd->rd_idle_data.idling_queue_idx = ROWQ_MAX_PRIO;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300852 kfree(rd);
853}
854
855/*
856 * row_merged_requests() - Called when 2 requests are merged
857 * @q: requests queue
858 * @rq: request the two requests were merged into
859 * @next: request that was merged
860 */
861static void row_merged_requests(struct request_queue *q, struct request *rq,
862 struct request *next)
863{
864 struct row_queue *rqueue = RQ_ROWQ(next);
865
866 list_del_init(&next->queuelist);
Tatyana Brokhmanbd56be32013-01-13 22:04:59 +0200867 rqueue->nr_req--;
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200868 if (rqueue->rdata->pending_urgent_rq == next) {
869 pr_err("\n\nROW_WARNING: merging pending urgent!");
870 rqueue->rdata->pending_urgent_rq = rq;
871 rq->cmd_flags |= REQ_URGENT;
872 WARN_ON(!(next->cmd_flags & REQ_URGENT));
873 next->cmd_flags &= ~REQ_URGENT;
874 }
Tatyana Brokhman16349062012-09-20 10:46:10 +0300875 rqueue->rdata->nr_reqs[rq_data_dir(rq)]--;
876}
877
878/*
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200879 * row_get_queue_prio() - Get queue priority for a given request
Tatyana Brokhman16349062012-09-20 10:46:10 +0300880 *
881 * This is a helping function which purpose is to determine what
882 * ROW queue the given request should be added to (and
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200883 * dispatched from later on)
Tatyana Brokhman16349062012-09-20 10:46:10 +0300884 *
Tatyana Brokhman16349062012-09-20 10:46:10 +0300885 */
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200886static enum row_queue_prio row_get_queue_prio(struct request *rq,
887 struct row_data *rd)
Tatyana Brokhman16349062012-09-20 10:46:10 +0300888{
889 const int data_dir = rq_data_dir(rq);
890 const bool is_sync = rq_is_sync(rq);
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200891 enum row_queue_prio q_type = ROWQ_MAX_PRIO;
892 int ioprio_class = IOPRIO_PRIO_CLASS(rq->elv.icq->ioc->ioprio);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300893
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200894 switch (ioprio_class) {
895 case IOPRIO_CLASS_RT:
896 if (data_dir == READ)
897 q_type = ROWQ_PRIO_HIGH_READ;
898 else if (is_sync)
899 q_type = ROWQ_PRIO_HIGH_SWRITE;
900 else {
901 pr_err("%s:%s(): got a simple write from RT_CLASS. How???",
902 rq->rq_disk->disk_name, __func__);
903 q_type = ROWQ_PRIO_REG_WRITE;
904 }
905 break;
906 case IOPRIO_CLASS_IDLE:
907 if (data_dir == READ)
908 q_type = ROWQ_PRIO_LOW_READ;
909 else if (is_sync)
910 q_type = ROWQ_PRIO_LOW_SWRITE;
911 else {
912 pr_err("%s:%s(): got a simple write from IDLE_CLASS. How???",
913 rq->rq_disk->disk_name, __func__);
914 q_type = ROWQ_PRIO_REG_WRITE;
915 }
916 break;
917 case IOPRIO_CLASS_NONE:
918 case IOPRIO_CLASS_BE:
919 default:
920 if (data_dir == READ)
921 q_type = ROWQ_PRIO_REG_READ;
922 else if (is_sync)
923 q_type = ROWQ_PRIO_REG_SWRITE;
924 else
925 q_type = ROWQ_PRIO_REG_WRITE;
926 break;
927 }
928
929 return q_type;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300930}
931
932/*
933 * row_set_request() - Set ROW data structures associated with this request.
934 * @q: requests queue
935 * @rq: pointer to the request
936 * @gfp_mask: ignored
937 *
938 */
939static int
940row_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
941{
942 struct row_data *rd = (struct row_data *)q->elevator->elevator_data;
943 unsigned long flags;
944
945 spin_lock_irqsave(q->queue_lock, flags);
946 rq->elv.priv[0] =
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200947 (void *)(&rd->row_queues[row_get_queue_prio(rq, rd)]);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300948 spin_unlock_irqrestore(q->queue_lock, flags);
949
950 return 0;
951}
952
953/********** Helping sysfs functions/defenitions for ROW attributes ******/
954static ssize_t row_var_show(int var, char *page)
955{
956 return snprintf(page, 100, "%d\n", var);
957}
958
959static ssize_t row_var_store(int *var, const char *page, size_t count)
960{
961 int err;
962 err = kstrtoul(page, 10, (unsigned long *)var);
963
964 return count;
965}
966
Tatyana Brokhmane9aab612013-03-21 11:04:02 +0200967#define SHOW_FUNCTION(__FUNC, __VAR) \
Tatyana Brokhman16349062012-09-20 10:46:10 +0300968static ssize_t __FUNC(struct elevator_queue *e, char *page) \
969{ \
970 struct row_data *rowd = e->elevator_data; \
971 int __data = __VAR; \
Tatyana Brokhman16349062012-09-20 10:46:10 +0300972 return row_var_show(__data, (page)); \
973}
974SHOW_FUNCTION(row_hp_read_quantum_show,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +0200975 rowd->row_queues[ROWQ_PRIO_HIGH_READ].disp_quantum);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300976SHOW_FUNCTION(row_rp_read_quantum_show,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +0200977 rowd->row_queues[ROWQ_PRIO_REG_READ].disp_quantum);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300978SHOW_FUNCTION(row_hp_swrite_quantum_show,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +0200979 rowd->row_queues[ROWQ_PRIO_HIGH_SWRITE].disp_quantum);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300980SHOW_FUNCTION(row_rp_swrite_quantum_show,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +0200981 rowd->row_queues[ROWQ_PRIO_REG_SWRITE].disp_quantum);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300982SHOW_FUNCTION(row_rp_write_quantum_show,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +0200983 rowd->row_queues[ROWQ_PRIO_REG_WRITE].disp_quantum);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300984SHOW_FUNCTION(row_lp_read_quantum_show,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +0200985 rowd->row_queues[ROWQ_PRIO_LOW_READ].disp_quantum);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300986SHOW_FUNCTION(row_lp_swrite_quantum_show,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +0200987 rowd->row_queues[ROWQ_PRIO_LOW_SWRITE].disp_quantum);
988SHOW_FUNCTION(row_rd_idle_data_show, rowd->rd_idle_data.idle_time_ms);
989SHOW_FUNCTION(row_rd_idle_data_freq_show, rowd->rd_idle_data.freq_ms);
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200990SHOW_FUNCTION(row_reg_starv_limit_show,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +0200991 rowd->reg_prio_starvation.starvation_limit);
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200992SHOW_FUNCTION(row_low_starv_limit_show,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +0200993 rowd->low_prio_starvation.starvation_limit);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300994#undef SHOW_FUNCTION
995
Tatyana Brokhmane9aab612013-03-21 11:04:02 +0200996#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \
Tatyana Brokhman16349062012-09-20 10:46:10 +0300997static ssize_t __FUNC(struct elevator_queue *e, \
998 const char *page, size_t count) \
999{ \
1000 struct row_data *rowd = e->elevator_data; \
1001 int __data; \
1002 int ret = row_var_store(&__data, (page), count); \
Tatyana Brokhman16349062012-09-20 10:46:10 +03001003 if (__data < (MIN)) \
1004 __data = (MIN); \
1005 else if (__data > (MAX)) \
1006 __data = (MAX); \
1007 *(__PTR) = __data; \
1008 return ret; \
1009}
1010STORE_FUNCTION(row_hp_read_quantum_store,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +02001011&rowd->row_queues[ROWQ_PRIO_HIGH_READ].disp_quantum, 1, INT_MAX);
Tatyana Brokhman16349062012-09-20 10:46:10 +03001012STORE_FUNCTION(row_rp_read_quantum_store,
Tatyana Brokhman0a0345a2012-10-15 20:50:54 +02001013 &rowd->row_queues[ROWQ_PRIO_REG_READ].disp_quantum,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +02001014 1, INT_MAX);
Tatyana Brokhman16349062012-09-20 10:46:10 +03001015STORE_FUNCTION(row_hp_swrite_quantum_store,
Tatyana Brokhman0a0345a2012-10-15 20:50:54 +02001016 &rowd->row_queues[ROWQ_PRIO_HIGH_SWRITE].disp_quantum,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +02001017 1, INT_MAX);
Tatyana Brokhman16349062012-09-20 10:46:10 +03001018STORE_FUNCTION(row_rp_swrite_quantum_store,
Tatyana Brokhman0a0345a2012-10-15 20:50:54 +02001019 &rowd->row_queues[ROWQ_PRIO_REG_SWRITE].disp_quantum,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +02001020 1, INT_MAX);
Tatyana Brokhman16349062012-09-20 10:46:10 +03001021STORE_FUNCTION(row_rp_write_quantum_store,
Tatyana Brokhman0a0345a2012-10-15 20:50:54 +02001022 &rowd->row_queues[ROWQ_PRIO_REG_WRITE].disp_quantum,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +02001023 1, INT_MAX);
Tatyana Brokhman16349062012-09-20 10:46:10 +03001024STORE_FUNCTION(row_lp_read_quantum_store,
Tatyana Brokhman0a0345a2012-10-15 20:50:54 +02001025 &rowd->row_queues[ROWQ_PRIO_LOW_READ].disp_quantum,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +02001026 1, INT_MAX);
Tatyana Brokhman16349062012-09-20 10:46:10 +03001027STORE_FUNCTION(row_lp_swrite_quantum_store,
Tatyana Brokhman0a0345a2012-10-15 20:50:54 +02001028 &rowd->row_queues[ROWQ_PRIO_LOW_SWRITE].disp_quantum,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +02001029 1, INT_MAX);
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +02001030STORE_FUNCTION(row_rd_idle_data_store, &rowd->rd_idle_data.idle_time_ms,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +02001031 1, INT_MAX);
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +02001032STORE_FUNCTION(row_rd_idle_data_freq_store, &rowd->rd_idle_data.freq_ms,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +02001033 1, INT_MAX);
Tatyana Brokhmaneec49472013-03-21 13:02:07 +02001034STORE_FUNCTION(row_reg_starv_limit_store,
1035 &rowd->reg_prio_starvation.starvation_limit,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +02001036 1, INT_MAX);
Tatyana Brokhmaneec49472013-03-21 13:02:07 +02001037STORE_FUNCTION(row_low_starv_limit_store,
1038 &rowd->low_prio_starvation.starvation_limit,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +02001039 1, INT_MAX);
Tatyana Brokhman16349062012-09-20 10:46:10 +03001040
1041#undef STORE_FUNCTION
1042
1043#define ROW_ATTR(name) \
1044 __ATTR(name, S_IRUGO|S_IWUSR, row_##name##_show, \
1045 row_##name##_store)
1046
1047static struct elv_fs_entry row_attrs[] = {
1048 ROW_ATTR(hp_read_quantum),
1049 ROW_ATTR(rp_read_quantum),
1050 ROW_ATTR(hp_swrite_quantum),
1051 ROW_ATTR(rp_swrite_quantum),
1052 ROW_ATTR(rp_write_quantum),
1053 ROW_ATTR(lp_read_quantum),
1054 ROW_ATTR(lp_swrite_quantum),
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +02001055 ROW_ATTR(rd_idle_data),
1056 ROW_ATTR(rd_idle_data_freq),
Tatyana Brokhmaneec49472013-03-21 13:02:07 +02001057 ROW_ATTR(reg_starv_limit),
1058 ROW_ATTR(low_starv_limit),
Tatyana Brokhman16349062012-09-20 10:46:10 +03001059 __ATTR_NULL
1060};
1061
1062static struct elevator_type iosched_row = {
1063 .ops = {
1064 .elevator_merge_req_fn = row_merged_requests,
1065 .elevator_dispatch_fn = row_dispatch_requests,
1066 .elevator_add_req_fn = row_add_request,
Tatyana Brokhmanb7bf9ac2012-10-30 08:33:06 +02001067 .elevator_reinsert_req_fn = row_reinsert_req,
Tatyana Brokhman0ef81432012-12-20 19:23:58 +02001068 .elevator_is_urgent_fn = row_urgent_pending,
Tatyana Brokhman4c3c3cc2013-01-24 15:08:40 +02001069 .elevator_completed_req_fn = row_completed_req,
Tatyana Brokhman16349062012-09-20 10:46:10 +03001070 .elevator_former_req_fn = elv_rb_former_request,
1071 .elevator_latter_req_fn = elv_rb_latter_request,
1072 .elevator_set_req_fn = row_set_request,
1073 .elevator_init_fn = row_init_queue,
1074 .elevator_exit_fn = row_exit_queue,
1075 },
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +02001076 .icq_size = sizeof(struct io_cq),
1077 .icq_align = __alignof__(struct io_cq),
Tatyana Brokhman16349062012-09-20 10:46:10 +03001078 .elevator_attrs = row_attrs,
1079 .elevator_name = "row",
1080 .elevator_owner = THIS_MODULE,
1081};
1082
1083static int __init row_init(void)
1084{
1085 elv_register(&iosched_row);
1086 return 0;
1087}
1088
1089static void __exit row_exit(void)
1090{
1091 elv_unregister(&iosched_row);
1092}
1093
1094module_init(row_init);
1095module_exit(row_exit);
1096
1097MODULE_LICENSE("GPLv2");
1098MODULE_DESCRIPTION("Read Over Write IO scheduler");