blob: dfb46b4271276e1d81377662f5253857f760c458 [file] [log] [blame]
Tatyana Brokhman16349062012-09-20 10:46:10 +03001/*
2 * ROW (Read Over Write) I/O scheduler.
3 *
Tatyana Brokhmanbd56be32013-01-13 22:04:59 +02004 * Copyright (c) 2012-2013, The Linux Foundation. All rights reserved.
Tatyana Brokhman16349062012-09-20 10:46:10 +03005 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 and
8 * only version 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 */
15
16/* See Documentation/block/row-iosched.txt */
17
18#include <linux/kernel.h>
19#include <linux/fs.h>
20#include <linux/blkdev.h>
21#include <linux/elevator.h>
22#include <linux/bio.h>
23#include <linux/module.h>
24#include <linux/slab.h>
25#include <linux/init.h>
26#include <linux/compiler.h>
27#include <linux/blktrace_api.h>
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +020028#include <linux/hrtimer.h>
Tatyana Brokhman16349062012-09-20 10:46:10 +030029
30/*
31 * enum row_queue_prio - Priorities of the ROW queues
32 *
33 * This enum defines the priorities (and the number of queues)
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +020034 * the requests will be distributed to. The higher priority -
35 * the bigger is the "bus time" (or the dispatch quantum) given
36 * to that queue.
Tatyana Brokhman16349062012-09-20 10:46:10 +030037 * ROWQ_PRIO_HIGH_READ - is the higher priority queue.
38 *
39 */
40enum row_queue_prio {
41 ROWQ_PRIO_HIGH_READ = 0,
Tatyana Brokhman16349062012-09-20 10:46:10 +030042 ROWQ_PRIO_HIGH_SWRITE,
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +020043 ROWQ_PRIO_REG_READ,
Tatyana Brokhman16349062012-09-20 10:46:10 +030044 ROWQ_PRIO_REG_SWRITE,
45 ROWQ_PRIO_REG_WRITE,
46 ROWQ_PRIO_LOW_READ,
47 ROWQ_PRIO_LOW_SWRITE,
48 ROWQ_MAX_PRIO,
49};
50
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +020051/*
52 * The following indexes define the distribution of ROW queues according to
53 * priorities. Each index defines the first queue in that priority group.
54 */
55#define ROWQ_HIGH_PRIO_IDX ROWQ_PRIO_HIGH_READ
56#define ROWQ_REG_PRIO_IDX ROWQ_PRIO_REG_READ
57#define ROWQ_LOW_PRIO_IDX ROWQ_PRIO_LOW_READ
58
Tatyana Brokhman9375bcc2013-01-12 16:23:18 +020059/**
60 * struct row_queue_params - ROW queue parameters
61 * @idling_enabled: Flag indicating whether idling is enable on
62 * the queue
63 * @quantum: Number of requests to be dispatched from this queue
64 * in a dispatch cycle
65 * @is_urgent: Flags indicating whether the queue can notify on
66 * urgent requests
67 *
68 */
69struct row_queue_params {
70 bool idling_enabled;
71 int quantum;
72 bool is_urgent;
Tatyana Brokhman16349062012-09-20 10:46:10 +030073};
74
Tatyana Brokhman9375bcc2013-01-12 16:23:18 +020075/*
76 * This array holds the default values of the different configurables
77 * for each ROW queue. Each row of the array holds the following values:
78 * {idling_enabled, quantum, is_urgent}
79 * Each row corresponds to a queue with the same index (according to
80 * enum row_queue_prio)
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +020081 * Note: The quantums are valid inside their priority type. For example:
82 * For every 10 high priority read requests, 1 high priority sync
83 * write will be dispatched.
84 * For every 100 regular read requests 1 regular write request will
85 * be dispatched.
Tatyana Brokhman9375bcc2013-01-12 16:23:18 +020086 */
87static const struct row_queue_params row_queues_def[] = {
88/* idling_enabled, quantum, is_urgent */
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +020089 {true, 10, true}, /* ROWQ_PRIO_HIGH_READ */
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +020090 {false, 1, false}, /* ROWQ_PRIO_HIGH_SWRITE */
Tatyana Brokhman9375bcc2013-01-12 16:23:18 +020091 {true, 100, true}, /* ROWQ_PRIO_REG_READ */
Tatyana Brokhman9375bcc2013-01-12 16:23:18 +020092 {false, 1, false}, /* ROWQ_PRIO_REG_SWRITE */
93 {false, 1, false}, /* ROWQ_PRIO_REG_WRITE */
94 {false, 1, false}, /* ROWQ_PRIO_LOW_READ */
95 {false, 1, false} /* ROWQ_PRIO_LOW_SWRITE */
Tatyana Brokhman16349062012-09-20 10:46:10 +030096};
97
Tatyana Brokhmanbfb04f62012-12-06 13:17:19 +020098/* Default values for idling on read queues (in msec) */
99#define ROW_IDLE_TIME_MSEC 5
Lee Susmand2439092013-06-23 16:27:40 +0300100#define ROW_READ_FREQ_MSEC 5
Tatyana Brokhman16349062012-09-20 10:46:10 +0300101
102/**
103 * struct rowq_idling_data - parameters for idling on the queue
Tatyana Brokhmanbfb04f62012-12-06 13:17:19 +0200104 * @last_insert_time: time the last request was inserted
105 * to the queue
Tatyana Brokhman16349062012-09-20 10:46:10 +0300106 * @begin_idling: flag indicating wether we should idle
107 *
108 */
109struct rowq_idling_data {
Tatyana Brokhmanbfb04f62012-12-06 13:17:19 +0200110 ktime_t last_insert_time;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300111 bool begin_idling;
112};
113
114/**
115 * struct row_queue - requests grouping structure
116 * @rdata: parent row_data structure
117 * @fifo: fifo of requests
118 * @prio: queue priority (enum row_queue_prio)
119 * @nr_dispatched: number of requests already dispatched in
120 * the current dispatch cycle
Tatyana Brokhmanbd56be32013-01-13 22:04:59 +0200121 * @nr_req: number of requests in queue
Tatyana Brokhman8a970bc2013-01-12 16:21:12 +0200122 * @dispatch quantum: number of requests this queue may
123 * dispatch in a dispatch cycle
Tatyana Brokhman16349062012-09-20 10:46:10 +0300124 * @idle_data: data for idling on queues
125 *
126 */
127struct row_queue {
128 struct row_data *rdata;
129 struct list_head fifo;
130 enum row_queue_prio prio;
131
132 unsigned int nr_dispatched;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300133
Tatyana Brokhmanbd56be32013-01-13 22:04:59 +0200134 unsigned int nr_req;
Tatyana Brokhman8a970bc2013-01-12 16:21:12 +0200135 int disp_quantum;
Tatyana Brokhmanbd56be32013-01-13 22:04:59 +0200136
Tatyana Brokhman16349062012-09-20 10:46:10 +0300137 /* used only for READ queues */
138 struct rowq_idling_data idle_data;
139};
140
141/**
142 * struct idling_data - data for idling on empty rqueue
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +0200143 * @idle_time_ms: idling duration (msec)
144 * @freq_ms: min time between two requests that
Tatyana Brokhman16349062012-09-20 10:46:10 +0300145 * triger idling (msec)
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +0200146 * @hr_timer: idling timer
147 * @idle_work: the work to be scheduled when idling timer expires
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200148 * @idling_queue_idx: index of the queues we're idling on
Tatyana Brokhman16349062012-09-20 10:46:10 +0300149 *
150 */
151struct idling_data {
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +0200152 s64 idle_time_ms;
153 s64 freq_ms;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300154
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +0200155 struct hrtimer hr_timer;
156 struct work_struct idle_work;
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200157 enum row_queue_prio idling_queue_idx;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300158};
159
160/**
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200161 * struct starvation_data - data for starvation management
162 * @starvation_limit: number of times this priority class
163 * can tolerate being starved
164 * @starvation_counter: number of requests from higher
165 * priority classes that were dispatched while this
166 * priority request were pending
167 *
168 */
169struct starvation_data {
170 int starvation_limit;
171 int starvation_counter;
172};
173
174/**
Tatyana Brokhman16349062012-09-20 10:46:10 +0300175 * struct row_queue - Per block device rqueue structure
176 * @dispatch_queue: dispatch rqueue
Tatyana Brokhman8a970bc2013-01-12 16:21:12 +0200177 * @row_queues: array of priority request queues
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200178 * @rd_idle_data: data for idling after READ request
Tatyana Brokhman16349062012-09-20 10:46:10 +0300179 * @nr_reqs: nr_reqs[0] holds the number of all READ requests in
180 * scheduler, nr_reqs[1] holds the number of all WRITE
181 * requests in scheduler
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200182 * @urgent_in_flight: flag indicating that there is an urgent
183 * request that was dispatched to driver and is yet to
184 * complete.
185 * @pending_urgent_rq: pointer to the pending urgent request
186 * @last_served_ioprio_class: I/O priority class that was last dispatched from
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200187 * @reg_prio_starvation: starvation data for REGULAR priority queues
188 * @low_prio_starvation: starvation data for LOW priority queues
Tatyana Brokhman16349062012-09-20 10:46:10 +0300189 * @cycle_flags: used for marking unserved queueus
190 *
191 */
192struct row_data {
193 struct request_queue *dispatch_queue;
194
Tatyana Brokhman8a970bc2013-01-12 16:21:12 +0200195 struct row_queue row_queues[ROWQ_MAX_PRIO];
Tatyana Brokhman16349062012-09-20 10:46:10 +0300196
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200197 struct idling_data rd_idle_data;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300198 unsigned int nr_reqs[2];
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200199 bool urgent_in_flight;
200 struct request *pending_urgent_rq;
201 int last_served_ioprio_class;
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200202
Maya Erez7fd5fad2013-04-14 15:19:52 +0300203#define ROW_REG_STARVATION_TOLLERANCE 5000
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200204 struct starvation_data reg_prio_starvation;
Maya Erez7fd5fad2013-04-14 15:19:52 +0300205#define ROW_LOW_STARVATION_TOLLERANCE 10000
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200206 struct starvation_data low_prio_starvation;
207
Tatyana Brokhman16349062012-09-20 10:46:10 +0300208 unsigned int cycle_flags;
209};
210
211#define RQ_ROWQ(rq) ((struct row_queue *) ((rq)->elv.priv[0]))
212
213#define row_log(q, fmt, args...) \
214 blk_add_trace_msg(q, "%s():" fmt , __func__, ##args)
215#define row_log_rowq(rdata, rowq_id, fmt, args...) \
216 blk_add_trace_msg(rdata->dispatch_queue, "rowq%d " fmt, \
217 rowq_id, ##args)
218
219static inline void row_mark_rowq_unserved(struct row_data *rd,
220 enum row_queue_prio qnum)
221{
222 rd->cycle_flags |= (1 << qnum);
223}
224
225static inline void row_clear_rowq_unserved(struct row_data *rd,
226 enum row_queue_prio qnum)
227{
228 rd->cycle_flags &= ~(1 << qnum);
229}
230
231static inline int row_rowq_unserved(struct row_data *rd,
232 enum row_queue_prio qnum)
233{
234 return rd->cycle_flags & (1 << qnum);
235}
236
Tatyana Brokhmanbd56be32013-01-13 22:04:59 +0200237static inline void __maybe_unused row_dump_queues_stat(struct row_data *rd)
238{
239 int i;
240
Tatyana Brokhman8a970bc2013-01-12 16:21:12 +0200241 row_log(rd->dispatch_queue, " Queues status:");
Tatyana Brokhmanbd56be32013-01-13 22:04:59 +0200242 for (i = 0; i < ROWQ_MAX_PRIO; i++)
243 row_log(rd->dispatch_queue,
244 "queue%d: dispatched= %d, nr_req=%d", i,
245 rd->row_queues[i].nr_dispatched,
246 rd->row_queues[i].nr_req);
247}
248
Tatyana Brokhman16349062012-09-20 10:46:10 +0300249/******************** Static helper functions ***********************/
Tatyana Brokhman16349062012-09-20 10:46:10 +0300250static void kick_queue(struct work_struct *work)
251{
Tatyana Brokhman16349062012-09-20 10:46:10 +0300252 struct idling_data *read_data =
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +0200253 container_of(work, struct idling_data, idle_work);
254 struct row_data *rd =
255 container_of(read_data, struct row_data, rd_idle_data);
256
257 blk_run_queue(rd->dispatch_queue);
258}
259
260
261static enum hrtimer_restart row_idle_hrtimer_fn(struct hrtimer *hr_timer)
262{
263 struct idling_data *read_data =
264 container_of(hr_timer, struct idling_data, hr_timer);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300265 struct row_data *rd =
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200266 container_of(read_data, struct row_data, rd_idle_data);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300267
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200268 row_log_rowq(rd, rd->rd_idle_data.idling_queue_idx,
269 "Performing delayed work");
Tatyana Brokhman16349062012-09-20 10:46:10 +0300270 /* Mark idling process as done */
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200271 rd->row_queues[rd->rd_idle_data.idling_queue_idx].
272 idle_data.begin_idling = false;
273 rd->rd_idle_data.idling_queue_idx = ROWQ_MAX_PRIO;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300274
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200275 if (!rd->nr_reqs[READ] && !rd->nr_reqs[WRITE])
Tatyana Brokhman16349062012-09-20 10:46:10 +0300276 row_log(rd->dispatch_queue, "No requests in scheduler");
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +0200277 else
278 kblockd_schedule_work(rd->dispatch_queue,
279 &read_data->idle_work);
280 return HRTIMER_NORESTART;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300281}
282
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200283/*
284 * row_regular_req_pending() - Check if there are REGULAR priority requests
285 * Pending in scheduler
286 * @rd: pointer to struct row_data
287 *
288 * Returns True if there are REGULAR priority requests in scheduler queues.
289 * False, otherwise.
290 */
291static inline bool row_regular_req_pending(struct row_data *rd)
292{
293 int i;
294
295 for (i = ROWQ_REG_PRIO_IDX; i < ROWQ_LOW_PRIO_IDX; i++)
296 if (!list_empty(&rd->row_queues[i].fifo))
297 return true;
298 return false;
299}
300
301/*
302 * row_low_req_pending() - Check if there are LOW priority requests
303 * Pending in scheduler
304 * @rd: pointer to struct row_data
305 *
306 * Returns True if there are LOW priority requests in scheduler queues.
307 * False, otherwise.
308 */
309static inline bool row_low_req_pending(struct row_data *rd)
310{
311 int i;
312
313 for (i = ROWQ_LOW_PRIO_IDX; i < ROWQ_MAX_PRIO; i++)
314 if (!list_empty(&rd->row_queues[i].fifo))
315 return true;
316 return false;
317}
318
Tatyana Brokhman16349062012-09-20 10:46:10 +0300319/******************* Elevator callback functions *********************/
320
321/*
322 * row_add_request() - Add request to the scheduler
323 * @q: requests queue
324 * @rq: request to add
325 *
326 */
327static void row_add_request(struct request_queue *q,
328 struct request *rq)
329{
330 struct row_data *rd = (struct row_data *)q->elevator->elevator_data;
331 struct row_queue *rqueue = RQ_ROWQ(rq);
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200332 s64 diff_ms;
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200333 bool queue_was_empty = list_empty(&rqueue->fifo);
Lee Susmand2439092013-06-23 16:27:40 +0300334 unsigned long bv_page_flags = 0;
335
336 if (rq->bio && rq->bio->bi_io_vec && rq->bio->bi_io_vec->bv_page)
337 bv_page_flags = rq->bio->bi_io_vec->bv_page->flags;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300338
339 list_add_tail(&rq->queuelist, &rqueue->fifo);
340 rd->nr_reqs[rq_data_dir(rq)]++;
Tatyana Brokhmanbd56be32013-01-13 22:04:59 +0200341 rqueue->nr_req++;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300342 rq_set_fifo_time(rq, jiffies); /* for statistics*/
343
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200344 if (rq->cmd_flags & REQ_URGENT) {
345 WARN_ON(1);
346 blk_dump_rq_flags(rq, "");
347 rq->cmd_flags &= ~REQ_URGENT;
348 }
349
Tatyana Brokhman9375bcc2013-01-12 16:23:18 +0200350 if (row_queues_def[rqueue->prio].idling_enabled) {
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200351 if (rd->rd_idle_data.idling_queue_idx == rqueue->prio &&
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +0200352 hrtimer_active(&rd->rd_idle_data.hr_timer)) {
Tatyana Brokhmana1d6f9e2013-07-02 14:43:13 +0300353 if (hrtimer_try_to_cancel(
354 &rd->rd_idle_data.hr_timer) >= 0) {
355 row_log_rowq(rd, rqueue->prio,
356 "Canceled delayed work on %d",
357 rd->rd_idle_data.idling_queue_idx);
358 rd->rd_idle_data.idling_queue_idx =
359 ROWQ_MAX_PRIO;
360 }
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200361 }
362 diff_ms = ktime_to_ms(ktime_sub(ktime_get(),
363 rqueue->idle_data.last_insert_time));
364 if (unlikely(diff_ms < 0)) {
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200365 pr_err("%s(): time delta error: diff_ms < 0",
366 __func__);
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200367 rqueue->idle_data.begin_idling = false;
368 return;
369 }
Lee Susmand2439092013-06-23 16:27:40 +0300370
371 if ((bv_page_flags & (1L << PG_readahead)) ||
372 (diff_ms < rd->rd_idle_data.freq_ms)) {
Tatyana Brokhman16349062012-09-20 10:46:10 +0300373 rqueue->idle_data.begin_idling = true;
374 row_log_rowq(rd, rqueue->prio, "Enable idling");
Tatyana Brokhmanbfb04f62012-12-06 13:17:19 +0200375 } else {
Tatyana Brokhman16349062012-09-20 10:46:10 +0300376 rqueue->idle_data.begin_idling = false;
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200377 row_log_rowq(rd, rqueue->prio, "Disable idling (%ldms)",
378 (long)diff_ms);
Tatyana Brokhmanbfb04f62012-12-06 13:17:19 +0200379 }
Tatyana Brokhman16349062012-09-20 10:46:10 +0300380
Tatyana Brokhmanbfb04f62012-12-06 13:17:19 +0200381 rqueue->idle_data.last_insert_time = ktime_get();
Tatyana Brokhman16349062012-09-20 10:46:10 +0300382 }
Tatyana Brokhman9375bcc2013-01-12 16:23:18 +0200383 if (row_queues_def[rqueue->prio].is_urgent &&
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200384 !rd->pending_urgent_rq && !rd->urgent_in_flight) {
385 /* Handle High Priority queues */
386 if (rqueue->prio < ROWQ_REG_PRIO_IDX &&
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200387 rd->last_served_ioprio_class != IOPRIO_CLASS_RT &&
388 queue_was_empty) {
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200389 row_log_rowq(rd, rqueue->prio,
390 "added (high prio) urgent request");
391 rq->cmd_flags |= REQ_URGENT;
392 rd->pending_urgent_rq = rq;
393 } else if (row_rowq_unserved(rd, rqueue->prio)) {
394 /* Handle Regular priotity queues */
395 row_log_rowq(rd, rqueue->prio,
396 "added urgent request (total on queue=%d)",
397 rqueue->nr_req);
398 rq->cmd_flags |= REQ_URGENT;
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200399 rd->pending_urgent_rq = rq;
400 }
Tatyana Brokhman0ef81432012-12-20 19:23:58 +0200401 } else
Tatyana Brokhmanbd56be32013-01-13 22:04:59 +0200402 row_log_rowq(rd, rqueue->prio,
403 "added request (total on queue=%d)", rqueue->nr_req);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300404}
405
Tatyana Brokhmanb7bf9ac2012-10-30 08:33:06 +0200406/**
407 * row_reinsert_req() - Reinsert request back to the scheduler
408 * @q: requests queue
409 * @rq: request to add
410 *
411 * Reinsert the given request back to the queue it was
412 * dispatched from as if it was never dispatched.
413 *
414 * Returns 0 on success, error code otherwise
415 */
416static int row_reinsert_req(struct request_queue *q,
417 struct request *rq)
418{
419 struct row_data *rd = q->elevator->elevator_data;
420 struct row_queue *rqueue = RQ_ROWQ(rq);
421
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200422 if (!rqueue || rqueue->prio >= ROWQ_MAX_PRIO)
Tatyana Brokhmanb7bf9ac2012-10-30 08:33:06 +0200423 return -EIO;
Tatyana Brokhmanb7bf9ac2012-10-30 08:33:06 +0200424
425 list_add(&rq->queuelist, &rqueue->fifo);
426 rd->nr_reqs[rq_data_dir(rq)]++;
Tatyana Brokhmanbd56be32013-01-13 22:04:59 +0200427 rqueue->nr_req++;
Tatyana Brokhmanb7bf9ac2012-10-30 08:33:06 +0200428
Tatyana Brokhmanbd56be32013-01-13 22:04:59 +0200429 row_log_rowq(rd, rqueue->prio,
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200430 "%s request reinserted (total on queue=%d)",
431 (rq_data_dir(rq) == READ ? "READ" : "write"), rqueue->nr_req);
Tatyana Brokhmanb7bf9ac2012-10-30 08:33:06 +0200432
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200433 if (rq->cmd_flags & REQ_URGENT) {
434 /*
435 * It's not compliant with the design to re-insert
436 * urgent requests. We want to be able to track this
437 * down.
438 */
439 WARN_ON(1);
440 if (!rd->urgent_in_flight) {
441 pr_err("%s(): no urgent in flight", __func__);
442 } else {
443 rd->urgent_in_flight = false;
444 pr_err("%s(): reinserting URGENT %s req",
445 __func__,
446 (rq_data_dir(rq) == READ ? "READ" : "WRITE"));
447 if (rd->pending_urgent_rq) {
448 pr_err("%s(): urgent rq is pending",
449 __func__);
450 rd->pending_urgent_rq->cmd_flags &= ~REQ_URGENT;
451 }
452 rd->pending_urgent_rq = rq;
453 }
454 }
Tatyana Brokhmanb7bf9ac2012-10-30 08:33:06 +0200455 return 0;
456}
457
Tatyana Brokhman4c3c3cc2013-01-24 15:08:40 +0200458static void row_completed_req(struct request_queue *q, struct request *rq)
459{
460 struct row_data *rd = q->elevator->elevator_data;
461
462 if (rq->cmd_flags & REQ_URGENT) {
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200463 if (!rd->urgent_in_flight) {
464 WARN_ON(1);
465 pr_err("%s(): URGENT req but urgent_in_flight = F",
Tatyana Brokhman4c3c3cc2013-01-24 15:08:40 +0200466 __func__);
Tatyana Brokhman4c3c3cc2013-01-24 15:08:40 +0200467 }
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200468 rd->urgent_in_flight = false;
469 rq->cmd_flags &= ~REQ_URGENT;
Tatyana Brokhman4c3c3cc2013-01-24 15:08:40 +0200470 }
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200471 row_log(q, "completed %s %s req.",
472 (rq->cmd_flags & REQ_URGENT ? "URGENT" : "regular"),
473 (rq_data_dir(rq) == READ ? "READ" : "WRITE"));
Tatyana Brokhman4c3c3cc2013-01-24 15:08:40 +0200474}
475
Tatyana Brokhmanb7bf9ac2012-10-30 08:33:06 +0200476/**
Tatyana Brokhman0ef81432012-12-20 19:23:58 +0200477 * row_urgent_pending() - Return TRUE if there is an urgent
478 * request on scheduler
479 * @q: requests queue
480 */
481static bool row_urgent_pending(struct request_queue *q)
482{
483 struct row_data *rd = q->elevator->elevator_data;
Tatyana Brokhman0ef81432012-12-20 19:23:58 +0200484
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200485 if (rd->urgent_in_flight) {
Tatyana Brokhman4c3c3cc2013-01-24 15:08:40 +0200486 row_log(rd->dispatch_queue, "%d urgent requests in flight",
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200487 rd->urgent_in_flight);
Tatyana Brokhman4c3c3cc2013-01-24 15:08:40 +0200488 return false;
489 }
490
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200491 if (rd->pending_urgent_rq) {
492 row_log(rd->dispatch_queue, "Urgent request pending");
493 return true;
494 }
Shashank Babu Chinta Venkata3df69bf2013-02-26 17:33:55 -0800495
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200496 row_log(rd->dispatch_queue, "no urgent request pending/in flight");
Tatyana Brokhman0ef81432012-12-20 19:23:58 +0200497 return false;
498}
499
500/**
Tatyana Brokhman16349062012-09-20 10:46:10 +0300501 * row_remove_request() - Remove given request from scheduler
502 * @q: requests queue
503 * @rq: request to remove
504 *
505 */
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200506static void row_remove_request(struct row_data *rd,
Tatyana Brokhman16349062012-09-20 10:46:10 +0300507 struct request *rq)
508{
Tatyana Brokhmanbd56be32013-01-13 22:04:59 +0200509 struct row_queue *rqueue = RQ_ROWQ(rq);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300510
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200511 list_del_init(&(rq)->queuelist);
512 if (rd->pending_urgent_rq == rq)
513 rd->pending_urgent_rq = NULL;
514 else
515 BUG_ON(rq->cmd_flags & REQ_URGENT);
Tatyana Brokhmanbd56be32013-01-13 22:04:59 +0200516 rqueue->nr_req--;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300517 rd->nr_reqs[rq_data_dir(rq)]--;
518}
519
520/*
521 * row_dispatch_insert() - move request to dispatch queue
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200522 * @rd: pointer to struct row_data
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200523 * @rq: the request to dispatch
Tatyana Brokhman16349062012-09-20 10:46:10 +0300524 *
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200525 * This function moves the given request to the dispatch queue
Tatyana Brokhman16349062012-09-20 10:46:10 +0300526 *
527 */
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200528static void row_dispatch_insert(struct row_data *rd, struct request *rq)
Tatyana Brokhman16349062012-09-20 10:46:10 +0300529{
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200530 struct row_queue *rqueue = RQ_ROWQ(rq);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300531
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200532 row_remove_request(rd, rq);
533 elv_dispatch_sort(rd->dispatch_queue, rq);
534 if (rq->cmd_flags & REQ_URGENT) {
535 WARN_ON(rd->urgent_in_flight);
536 rd->urgent_in_flight = true;
537 }
538 rqueue->nr_dispatched++;
539 row_clear_rowq_unserved(rd, rqueue->prio);
540 row_log_rowq(rd, rqueue->prio,
541 " Dispatched request %p nr_disp = %d", rq,
542 rqueue->nr_dispatched);
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200543 if (rqueue->prio < ROWQ_REG_PRIO_IDX) {
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200544 rd->last_served_ioprio_class = IOPRIO_CLASS_RT;
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200545 if (row_regular_req_pending(rd))
546 rd->reg_prio_starvation.starvation_counter++;
547 if (row_low_req_pending(rd))
548 rd->low_prio_starvation.starvation_counter++;
549 } else if (rqueue->prio < ROWQ_LOW_PRIO_IDX) {
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200550 rd->last_served_ioprio_class = IOPRIO_CLASS_BE;
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200551 rd->reg_prio_starvation.starvation_counter = 0;
552 if (row_low_req_pending(rd))
553 rd->low_prio_starvation.starvation_counter++;
554 } else {
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200555 rd->last_served_ioprio_class = IOPRIO_CLASS_IDLE;
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200556 rd->low_prio_starvation.starvation_counter = 0;
557 }
Tatyana Brokhman16349062012-09-20 10:46:10 +0300558}
559
560/*
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200561 * row_get_ioprio_class_to_serve() - Return the next I/O priority
562 * class to dispatch requests from
Tatyana Brokhman16349062012-09-20 10:46:10 +0300563 * @rd: pointer to struct row_data
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200564 * @force: flag indicating if forced dispatch
Tatyana Brokhman16349062012-09-20 10:46:10 +0300565 *
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200566 * This function returns the next I/O priority class to serve
567 * {IOPRIO_CLASS_NONE, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE}.
568 * If there are no more requests in scheduler or if we're idling on some queue
569 * IOPRIO_CLASS_NONE will be returned.
570 * If idling is scheduled on a lower priority queue than the one that needs
571 * to be served, it will be canceled.
Tatyana Brokhman16349062012-09-20 10:46:10 +0300572 *
573 */
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200574static int row_get_ioprio_class_to_serve(struct row_data *rd, int force)
Tatyana Brokhman16349062012-09-20 10:46:10 +0300575{
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200576 int i;
577 int ret = IOPRIO_CLASS_NONE;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300578
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200579 if (!rd->nr_reqs[READ] && !rd->nr_reqs[WRITE]) {
Tatyana Brokhman16349062012-09-20 10:46:10 +0300580 row_log(rd->dispatch_queue, "No more requests in scheduler");
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200581 goto check_idling;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300582 }
583
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200584 /* First, go over the high priority queues */
585 for (i = 0; i < ROWQ_REG_PRIO_IDX; i++) {
586 if (!list_empty(&rd->row_queues[i].fifo)) {
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +0200587 if (hrtimer_active(&rd->rd_idle_data.hr_timer)) {
Tatyana Brokhmana1d6f9e2013-07-02 14:43:13 +0300588 if (hrtimer_try_to_cancel(
589 &rd->rd_idle_data.hr_timer) >= 0) {
590 row_log(rd->dispatch_queue,
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200591 "Canceling delayed work on %d. RT pending",
Tatyana Brokhmana1d6f9e2013-07-02 14:43:13 +0300592 rd->rd_idle_data.idling_queue_idx);
593 rd->rd_idle_data.idling_queue_idx =
594 ROWQ_MAX_PRIO;
595 }
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200596 }
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200597
598 if (row_regular_req_pending(rd) &&
599 (rd->reg_prio_starvation.starvation_counter >=
600 rd->reg_prio_starvation.starvation_limit))
601 ret = IOPRIO_CLASS_BE;
602 else if (row_low_req_pending(rd) &&
603 (rd->low_prio_starvation.starvation_counter >=
604 rd->low_prio_starvation.starvation_limit))
605 ret = IOPRIO_CLASS_IDLE;
606 else
607 ret = IOPRIO_CLASS_RT;
608
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200609 goto done;
610 }
611 }
Tatyana Brokhman16349062012-09-20 10:46:10 +0300612
613 /*
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200614 * At the moment idling is implemented only for READ queues.
615 * If enabled on WRITE, this needs updating
Tatyana Brokhman16349062012-09-20 10:46:10 +0300616 */
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +0200617 if (hrtimer_active(&rd->rd_idle_data.hr_timer)) {
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200618 row_log(rd->dispatch_queue, "Delayed work pending. Exiting");
619 goto done;
620 }
621check_idling:
622 /* Check for (high priority) idling and enable if needed */
623 for (i = 0; i < ROWQ_REG_PRIO_IDX && !force; i++) {
624 if (rd->row_queues[i].idle_data.begin_idling &&
625 row_queues_def[i].idling_enabled)
626 goto initiate_idling;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300627 }
628
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200629 /* Regular priority queues */
630 for (i = ROWQ_REG_PRIO_IDX; i < ROWQ_LOW_PRIO_IDX; i++) {
631 if (list_empty(&rd->row_queues[i].fifo)) {
632 /* We can idle only if this is not a forced dispatch */
633 if (rd->row_queues[i].idle_data.begin_idling &&
634 !force && row_queues_def[i].idling_enabled)
635 goto initiate_idling;
636 } else {
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200637 if (row_low_req_pending(rd) &&
638 (rd->low_prio_starvation.starvation_counter >=
639 rd->low_prio_starvation.starvation_limit))
640 ret = IOPRIO_CLASS_IDLE;
641 else
642 ret = IOPRIO_CLASS_BE;
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200643 goto done;
644 }
645 }
646
647 if (rd->nr_reqs[READ] || rd->nr_reqs[WRITE])
648 ret = IOPRIO_CLASS_IDLE;
649 goto done;
650
651initiate_idling:
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +0200652 hrtimer_start(&rd->rd_idle_data.hr_timer,
653 ktime_set(0, rd->rd_idle_data.idle_time_ms * NSEC_PER_MSEC),
654 HRTIMER_MODE_REL);
655
656 rd->rd_idle_data.idling_queue_idx = i;
657 row_log_rowq(rd, i, "Scheduled delayed work on %d. exiting", i);
658
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200659done:
660 return ret;
661}
662
663static void row_restart_cycle(struct row_data *rd,
664 int start_idx, int end_idx)
665{
666 int i;
667
668 row_dump_queues_stat(rd);
669 for (i = start_idx; i < end_idx; i++) {
670 if (rd->row_queues[i].nr_dispatched <
671 rd->row_queues[i].disp_quantum)
672 row_mark_rowq_unserved(rd, i);
673 rd->row_queues[i].nr_dispatched = 0;
674 }
675 row_log(rd->dispatch_queue, "Restarting cycle for class @ %d-%d",
676 start_idx, end_idx);
677}
678
679/*
680 * row_get_next_queue() - selects the next queue to dispatch from
681 * @q: requests queue
682 * @rd: pointer to struct row_data
683 * @start_idx/end_idx: indexes in the row_queues array to select a queue
684 * from.
685 *
686 * Return index of the queues to dispatch from. Error code if fails.
687 *
688 */
689static int row_get_next_queue(struct request_queue *q, struct row_data *rd,
690 int start_idx, int end_idx)
691{
692 int i = start_idx;
693 bool restart = true;
694 int ret = -EIO;
695
696 do {
697 if (list_empty(&rd->row_queues[i].fifo) ||
698 rd->row_queues[i].nr_dispatched >=
699 rd->row_queues[i].disp_quantum) {
700 i++;
701 if (i == end_idx && restart) {
702 /* Restart cycle for this priority class */
703 row_restart_cycle(rd, start_idx, end_idx);
704 i = start_idx;
705 restart = false;
706 }
707 } else {
708 ret = i;
709 break;
710 }
711 } while (i < end_idx);
712
713 return ret;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300714}
715
716/*
717 * row_dispatch_requests() - selects the next request to dispatch
718 * @q: requests queue
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200719 * @force: flag indicating if forced dispatch
Tatyana Brokhman16349062012-09-20 10:46:10 +0300720 *
721 * Return 0 if no requests were moved to the dispatch queue.
722 * 1 otherwise
723 *
724 */
725static int row_dispatch_requests(struct request_queue *q, int force)
726{
727 struct row_data *rd = (struct row_data *)q->elevator->elevator_data;
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200728 int ret = 0, currq, ioprio_class_to_serve, start_idx, end_idx;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300729
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +0200730 if (force && hrtimer_active(&rd->rd_idle_data.hr_timer)) {
Tatyana Brokhmana1d6f9e2013-07-02 14:43:13 +0300731 if (hrtimer_try_to_cancel(&rd->rd_idle_data.hr_timer) >= 0) {
732 row_log(rd->dispatch_queue,
733 "Canceled delayed work on %d - forced dispatch",
734 rd->rd_idle_data.idling_queue_idx);
735 rd->rd_idle_data.idling_queue_idx = ROWQ_MAX_PRIO;
736 }
Tatyana Brokhman16349062012-09-20 10:46:10 +0300737 }
738
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200739 if (rd->pending_urgent_rq) {
740 row_log(rd->dispatch_queue, "dispatching urgent request");
741 row_dispatch_insert(rd, rd->pending_urgent_rq);
742 ret = 1;
743 goto done;
744 }
745
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200746 ioprio_class_to_serve = row_get_ioprio_class_to_serve(rd, force);
747 row_log(rd->dispatch_queue, "Dispatching from %d priority class",
748 ioprio_class_to_serve);
749
750 switch (ioprio_class_to_serve) {
751 case IOPRIO_CLASS_NONE:
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200752 rd->last_served_ioprio_class = IOPRIO_CLASS_NONE;
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200753 goto done;
754 case IOPRIO_CLASS_RT:
755 start_idx = ROWQ_HIGH_PRIO_IDX;
756 end_idx = ROWQ_REG_PRIO_IDX;
757 break;
758 case IOPRIO_CLASS_BE:
759 start_idx = ROWQ_REG_PRIO_IDX;
760 end_idx = ROWQ_LOW_PRIO_IDX;
761 break;
762 case IOPRIO_CLASS_IDLE:
763 start_idx = ROWQ_LOW_PRIO_IDX;
764 end_idx = ROWQ_MAX_PRIO;
765 break;
766 default:
767 pr_err("%s(): Invalid I/O priority class", __func__);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300768 goto done;
769 }
770
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200771 currq = row_get_next_queue(q, rd, start_idx, end_idx);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300772
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200773 /* Dispatch */
774 if (currq >= 0) {
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200775 row_dispatch_insert(rd,
776 rq_entry_fifo(rd->row_queues[currq].fifo.next));
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200777 ret = 1;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300778 }
Tatyana Brokhman16349062012-09-20 10:46:10 +0300779done:
780 return ret;
781}
782
783/*
784 * row_init_queue() - Init scheduler data structures
785 * @q: requests queue
786 *
787 * Return pointer to struct row_data to be saved in elevator for
788 * this dispatch queue
789 *
790 */
791static void *row_init_queue(struct request_queue *q)
792{
793
794 struct row_data *rdata;
795 int i;
796
797 rdata = kmalloc_node(sizeof(*rdata),
798 GFP_KERNEL | __GFP_ZERO, q->node);
799 if (!rdata)
800 return NULL;
801
Tatyana Brokhman522778f2013-01-24 16:17:27 +0200802 memset(rdata, 0, sizeof(*rdata));
Tatyana Brokhman16349062012-09-20 10:46:10 +0300803 for (i = 0; i < ROWQ_MAX_PRIO; i++) {
Tatyana Brokhman8a970bc2013-01-12 16:21:12 +0200804 INIT_LIST_HEAD(&rdata->row_queues[i].fifo);
Tatyana Brokhman9375bcc2013-01-12 16:23:18 +0200805 rdata->row_queues[i].disp_quantum = row_queues_def[i].quantum;
Tatyana Brokhman8a970bc2013-01-12 16:21:12 +0200806 rdata->row_queues[i].rdata = rdata;
807 rdata->row_queues[i].prio = i;
808 rdata->row_queues[i].idle_data.begin_idling = false;
809 rdata->row_queues[i].idle_data.last_insert_time =
Tatyana Brokhmanbfb04f62012-12-06 13:17:19 +0200810 ktime_set(0, 0);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300811 }
812
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200813 rdata->reg_prio_starvation.starvation_limit =
814 ROW_REG_STARVATION_TOLLERANCE;
815 rdata->low_prio_starvation.starvation_limit =
816 ROW_LOW_STARVATION_TOLLERANCE;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300817 /*
818 * Currently idling is enabled only for READ queues. If we want to
819 * enable it for write queues also, note that idling frequency will
820 * be the same in both cases
821 */
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +0200822 rdata->rd_idle_data.idle_time_ms = ROW_IDLE_TIME_MSEC;
823 rdata->rd_idle_data.freq_ms = ROW_READ_FREQ_MSEC;
824 hrtimer_init(&rdata->rd_idle_data.hr_timer,
825 CLOCK_MONOTONIC, HRTIMER_MODE_REL);
826 rdata->rd_idle_data.hr_timer.function = &row_idle_hrtimer_fn;
827
828 INIT_WORK(&rdata->rd_idle_data.idle_work, kick_queue);
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200829 rdata->last_served_ioprio_class = IOPRIO_CLASS_NONE;
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200830 rdata->rd_idle_data.idling_queue_idx = ROWQ_MAX_PRIO;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300831 rdata->dispatch_queue = q;
832
Tatyana Brokhman16349062012-09-20 10:46:10 +0300833 return rdata;
834}
835
836/*
837 * row_exit_queue() - called on unloading the RAW scheduler
838 * @e: poiner to struct elevator_queue
839 *
840 */
841static void row_exit_queue(struct elevator_queue *e)
842{
843 struct row_data *rd = (struct row_data *)e->elevator_data;
844 int i;
845
846 for (i = 0; i < ROWQ_MAX_PRIO; i++)
Tatyana Brokhman8a970bc2013-01-12 16:21:12 +0200847 BUG_ON(!list_empty(&rd->row_queues[i].fifo));
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +0200848 if (hrtimer_cancel(&rd->rd_idle_data.hr_timer))
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200849 pr_err("%s(): idle timer was active!", __func__);
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200850 rd->rd_idle_data.idling_queue_idx = ROWQ_MAX_PRIO;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300851 kfree(rd);
852}
853
854/*
855 * row_merged_requests() - Called when 2 requests are merged
856 * @q: requests queue
857 * @rq: request the two requests were merged into
858 * @next: request that was merged
859 */
860static void row_merged_requests(struct request_queue *q, struct request *rq,
861 struct request *next)
862{
863 struct row_queue *rqueue = RQ_ROWQ(next);
864
865 list_del_init(&next->queuelist);
Tatyana Brokhmanbd56be32013-01-13 22:04:59 +0200866 rqueue->nr_req--;
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200867 if (rqueue->rdata->pending_urgent_rq == next) {
868 pr_err("\n\nROW_WARNING: merging pending urgent!");
869 rqueue->rdata->pending_urgent_rq = rq;
870 rq->cmd_flags |= REQ_URGENT;
871 WARN_ON(!(next->cmd_flags & REQ_URGENT));
872 next->cmd_flags &= ~REQ_URGENT;
873 }
Tatyana Brokhman16349062012-09-20 10:46:10 +0300874 rqueue->rdata->nr_reqs[rq_data_dir(rq)]--;
875}
876
877/*
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200878 * row_get_queue_prio() - Get queue priority for a given request
Tatyana Brokhman16349062012-09-20 10:46:10 +0300879 *
880 * This is a helping function which purpose is to determine what
881 * ROW queue the given request should be added to (and
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200882 * dispatched from later on)
Tatyana Brokhman16349062012-09-20 10:46:10 +0300883 *
Tatyana Brokhman16349062012-09-20 10:46:10 +0300884 */
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200885static enum row_queue_prio row_get_queue_prio(struct request *rq,
886 struct row_data *rd)
Tatyana Brokhman16349062012-09-20 10:46:10 +0300887{
888 const int data_dir = rq_data_dir(rq);
889 const bool is_sync = rq_is_sync(rq);
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200890 enum row_queue_prio q_type = ROWQ_MAX_PRIO;
891 int ioprio_class = IOPRIO_PRIO_CLASS(rq->elv.icq->ioc->ioprio);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300892
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +0200893 switch (ioprio_class) {
894 case IOPRIO_CLASS_RT:
895 if (data_dir == READ)
896 q_type = ROWQ_PRIO_HIGH_READ;
897 else if (is_sync)
898 q_type = ROWQ_PRIO_HIGH_SWRITE;
899 else {
900 pr_err("%s:%s(): got a simple write from RT_CLASS. How???",
901 rq->rq_disk->disk_name, __func__);
902 q_type = ROWQ_PRIO_REG_WRITE;
903 }
904 break;
905 case IOPRIO_CLASS_IDLE:
906 if (data_dir == READ)
907 q_type = ROWQ_PRIO_LOW_READ;
908 else if (is_sync)
909 q_type = ROWQ_PRIO_LOW_SWRITE;
910 else {
911 pr_err("%s:%s(): got a simple write from IDLE_CLASS. How???",
912 rq->rq_disk->disk_name, __func__);
913 q_type = ROWQ_PRIO_REG_WRITE;
914 }
915 break;
916 case IOPRIO_CLASS_NONE:
917 case IOPRIO_CLASS_BE:
918 default:
919 if (data_dir == READ)
920 q_type = ROWQ_PRIO_REG_READ;
921 else if (is_sync)
922 q_type = ROWQ_PRIO_REG_SWRITE;
923 else
924 q_type = ROWQ_PRIO_REG_WRITE;
925 break;
926 }
927
928 return q_type;
Tatyana Brokhman16349062012-09-20 10:46:10 +0300929}
930
931/*
932 * row_set_request() - Set ROW data structures associated with this request.
933 * @q: requests queue
934 * @rq: pointer to the request
935 * @gfp_mask: ignored
936 *
937 */
938static int
939row_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
940{
941 struct row_data *rd = (struct row_data *)q->elevator->elevator_data;
942 unsigned long flags;
943
944 spin_lock_irqsave(q->queue_lock, flags);
945 rq->elv.priv[0] =
Tatyana Brokhmanfe6fd2f2013-03-12 21:17:18 +0200946 (void *)(&rd->row_queues[row_get_queue_prio(rq, rd)]);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300947 spin_unlock_irqrestore(q->queue_lock, flags);
948
949 return 0;
950}
951
952/********** Helping sysfs functions/defenitions for ROW attributes ******/
953static ssize_t row_var_show(int var, char *page)
954{
955 return snprintf(page, 100, "%d\n", var);
956}
957
958static ssize_t row_var_store(int *var, const char *page, size_t count)
959{
960 int err;
961 err = kstrtoul(page, 10, (unsigned long *)var);
962
963 return count;
964}
965
Tatyana Brokhmane9aab612013-03-21 11:04:02 +0200966#define SHOW_FUNCTION(__FUNC, __VAR) \
Tatyana Brokhman16349062012-09-20 10:46:10 +0300967static ssize_t __FUNC(struct elevator_queue *e, char *page) \
968{ \
969 struct row_data *rowd = e->elevator_data; \
970 int __data = __VAR; \
Tatyana Brokhman16349062012-09-20 10:46:10 +0300971 return row_var_show(__data, (page)); \
972}
973SHOW_FUNCTION(row_hp_read_quantum_show,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +0200974 rowd->row_queues[ROWQ_PRIO_HIGH_READ].disp_quantum);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300975SHOW_FUNCTION(row_rp_read_quantum_show,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +0200976 rowd->row_queues[ROWQ_PRIO_REG_READ].disp_quantum);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300977SHOW_FUNCTION(row_hp_swrite_quantum_show,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +0200978 rowd->row_queues[ROWQ_PRIO_HIGH_SWRITE].disp_quantum);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300979SHOW_FUNCTION(row_rp_swrite_quantum_show,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +0200980 rowd->row_queues[ROWQ_PRIO_REG_SWRITE].disp_quantum);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300981SHOW_FUNCTION(row_rp_write_quantum_show,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +0200982 rowd->row_queues[ROWQ_PRIO_REG_WRITE].disp_quantum);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300983SHOW_FUNCTION(row_lp_read_quantum_show,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +0200984 rowd->row_queues[ROWQ_PRIO_LOW_READ].disp_quantum);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300985SHOW_FUNCTION(row_lp_swrite_quantum_show,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +0200986 rowd->row_queues[ROWQ_PRIO_LOW_SWRITE].disp_quantum);
987SHOW_FUNCTION(row_rd_idle_data_show, rowd->rd_idle_data.idle_time_ms);
988SHOW_FUNCTION(row_rd_idle_data_freq_show, rowd->rd_idle_data.freq_ms);
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200989SHOW_FUNCTION(row_reg_starv_limit_show,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +0200990 rowd->reg_prio_starvation.starvation_limit);
Tatyana Brokhmaneec49472013-03-21 13:02:07 +0200991SHOW_FUNCTION(row_low_starv_limit_show,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +0200992 rowd->low_prio_starvation.starvation_limit);
Tatyana Brokhman16349062012-09-20 10:46:10 +0300993#undef SHOW_FUNCTION
994
Tatyana Brokhmane9aab612013-03-21 11:04:02 +0200995#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \
Tatyana Brokhman16349062012-09-20 10:46:10 +0300996static ssize_t __FUNC(struct elevator_queue *e, \
997 const char *page, size_t count) \
998{ \
999 struct row_data *rowd = e->elevator_data; \
1000 int __data; \
1001 int ret = row_var_store(&__data, (page), count); \
Tatyana Brokhman16349062012-09-20 10:46:10 +03001002 if (__data < (MIN)) \
1003 __data = (MIN); \
1004 else if (__data > (MAX)) \
1005 __data = (MAX); \
1006 *(__PTR) = __data; \
1007 return ret; \
1008}
1009STORE_FUNCTION(row_hp_read_quantum_store,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +02001010&rowd->row_queues[ROWQ_PRIO_HIGH_READ].disp_quantum, 1, INT_MAX);
Tatyana Brokhman16349062012-09-20 10:46:10 +03001011STORE_FUNCTION(row_rp_read_quantum_store,
Tatyana Brokhman0a0345a2012-10-15 20:50:54 +02001012 &rowd->row_queues[ROWQ_PRIO_REG_READ].disp_quantum,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +02001013 1, INT_MAX);
Tatyana Brokhman16349062012-09-20 10:46:10 +03001014STORE_FUNCTION(row_hp_swrite_quantum_store,
Tatyana Brokhman0a0345a2012-10-15 20:50:54 +02001015 &rowd->row_queues[ROWQ_PRIO_HIGH_SWRITE].disp_quantum,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +02001016 1, INT_MAX);
Tatyana Brokhman16349062012-09-20 10:46:10 +03001017STORE_FUNCTION(row_rp_swrite_quantum_store,
Tatyana Brokhman0a0345a2012-10-15 20:50:54 +02001018 &rowd->row_queues[ROWQ_PRIO_REG_SWRITE].disp_quantum,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +02001019 1, INT_MAX);
Tatyana Brokhman16349062012-09-20 10:46:10 +03001020STORE_FUNCTION(row_rp_write_quantum_store,
Tatyana Brokhman0a0345a2012-10-15 20:50:54 +02001021 &rowd->row_queues[ROWQ_PRIO_REG_WRITE].disp_quantum,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +02001022 1, INT_MAX);
Tatyana Brokhman16349062012-09-20 10:46:10 +03001023STORE_FUNCTION(row_lp_read_quantum_store,
Tatyana Brokhman0a0345a2012-10-15 20:50:54 +02001024 &rowd->row_queues[ROWQ_PRIO_LOW_READ].disp_quantum,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +02001025 1, INT_MAX);
Tatyana Brokhman16349062012-09-20 10:46:10 +03001026STORE_FUNCTION(row_lp_swrite_quantum_store,
Tatyana Brokhman0a0345a2012-10-15 20:50:54 +02001027 &rowd->row_queues[ROWQ_PRIO_LOW_SWRITE].disp_quantum,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +02001028 1, INT_MAX);
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +02001029STORE_FUNCTION(row_rd_idle_data_store, &rowd->rd_idle_data.idle_time_ms,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +02001030 1, INT_MAX);
Tatyana Brokhmance1a8ed2013-01-17 20:56:07 +02001031STORE_FUNCTION(row_rd_idle_data_freq_store, &rowd->rd_idle_data.freq_ms,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +02001032 1, INT_MAX);
Tatyana Brokhmaneec49472013-03-21 13:02:07 +02001033STORE_FUNCTION(row_reg_starv_limit_store,
1034 &rowd->reg_prio_starvation.starvation_limit,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +02001035 1, INT_MAX);
Tatyana Brokhmaneec49472013-03-21 13:02:07 +02001036STORE_FUNCTION(row_low_starv_limit_store,
1037 &rowd->low_prio_starvation.starvation_limit,
Tatyana Brokhmane9aab612013-03-21 11:04:02 +02001038 1, INT_MAX);
Tatyana Brokhman16349062012-09-20 10:46:10 +03001039
1040#undef STORE_FUNCTION
1041
1042#define ROW_ATTR(name) \
1043 __ATTR(name, S_IRUGO|S_IWUSR, row_##name##_show, \
1044 row_##name##_store)
1045
1046static struct elv_fs_entry row_attrs[] = {
1047 ROW_ATTR(hp_read_quantum),
1048 ROW_ATTR(rp_read_quantum),
1049 ROW_ATTR(hp_swrite_quantum),
1050 ROW_ATTR(rp_swrite_quantum),
1051 ROW_ATTR(rp_write_quantum),
1052 ROW_ATTR(lp_read_quantum),
1053 ROW_ATTR(lp_swrite_quantum),
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +02001054 ROW_ATTR(rd_idle_data),
1055 ROW_ATTR(rd_idle_data_freq),
Tatyana Brokhmaneec49472013-03-21 13:02:07 +02001056 ROW_ATTR(reg_starv_limit),
1057 ROW_ATTR(low_starv_limit),
Tatyana Brokhman16349062012-09-20 10:46:10 +03001058 __ATTR_NULL
1059};
1060
1061static struct elevator_type iosched_row = {
1062 .ops = {
1063 .elevator_merge_req_fn = row_merged_requests,
1064 .elevator_dispatch_fn = row_dispatch_requests,
1065 .elevator_add_req_fn = row_add_request,
Tatyana Brokhmanb7bf9ac2012-10-30 08:33:06 +02001066 .elevator_reinsert_req_fn = row_reinsert_req,
Tatyana Brokhman0ef81432012-12-20 19:23:58 +02001067 .elevator_is_urgent_fn = row_urgent_pending,
Tatyana Brokhman4c3c3cc2013-01-24 15:08:40 +02001068 .elevator_completed_req_fn = row_completed_req,
Tatyana Brokhman16349062012-09-20 10:46:10 +03001069 .elevator_former_req_fn = elv_rb_former_request,
1070 .elevator_latter_req_fn = elv_rb_latter_request,
1071 .elevator_set_req_fn = row_set_request,
1072 .elevator_init_fn = row_init_queue,
1073 .elevator_exit_fn = row_exit_queue,
1074 },
Tatyana Brokhmandb7c1532013-01-23 17:15:49 +02001075 .icq_size = sizeof(struct io_cq),
1076 .icq_align = __alignof__(struct io_cq),
Tatyana Brokhman16349062012-09-20 10:46:10 +03001077 .elevator_attrs = row_attrs,
1078 .elevator_name = "row",
1079 .elevator_owner = THIS_MODULE,
1080};
1081
1082static int __init row_init(void)
1083{
1084 elv_register(&iosched_row);
1085 return 0;
1086}
1087
1088static void __exit row_exit(void)
1089{
1090 elv_unregister(&iosched_row);
1091}
1092
1093module_init(row_init);
1094module_exit(row_exit);
1095
1096MODULE_LICENSE("GPLv2");
1097MODULE_DESCRIPTION("Read Over Write IO scheduler");