blob: 1ecb179b86042e826521241a9cfc9da473c95e8c [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/drivers/block/cfq-iosched.c
3 *
4 * CFQ, or complete fairness queueing, disk scheduler.
5 *
6 * Based on ideas from a previously unfinished io
7 * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
8 *
9 * Copyright (C) 2003 Jens Axboe <axboe@suse.de>
10 */
11#include <linux/kernel.h>
12#include <linux/fs.h>
13#include <linux/blkdev.h>
14#include <linux/elevator.h>
15#include <linux/bio.h>
16#include <linux/config.h>
17#include <linux/module.h>
18#include <linux/slab.h>
19#include <linux/init.h>
20#include <linux/compiler.h>
21#include <linux/hash.h>
22#include <linux/rbtree.h>
23#include <linux/mempool.h>
Jens Axboe22e2c502005-06-27 10:55:12 +020024#include <linux/ioprio.h>
25#include <linux/writeback.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070026
27/*
28 * tunables
29 */
30static int cfq_quantum = 4; /* max queue in one round of service */
31static int cfq_queued = 8; /* minimum rq allocate limit per-queue*/
Jens Axboe22e2c502005-06-27 10:55:12 +020032static int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
Linus Torvalds1da177e2005-04-16 15:20:36 -070033static int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */
34static int cfq_back_penalty = 2; /* penalty of a backwards seek */
35
Jens Axboe22e2c502005-06-27 10:55:12 +020036static int cfq_slice_sync = HZ / 10;
Jens Axboe3b181522005-06-27 10:56:24 +020037static int cfq_slice_async = HZ / 25;
Jens Axboe22e2c502005-06-27 10:55:12 +020038static int cfq_slice_async_rq = 2;
Jens Axboe3b181522005-06-27 10:56:24 +020039static int cfq_slice_idle = HZ / 100;
Jens Axboe22e2c502005-06-27 10:55:12 +020040
41#define CFQ_IDLE_GRACE (HZ / 10)
42#define CFQ_SLICE_SCALE (5)
43
44#define CFQ_KEY_ASYNC (0)
Jens Axboe3b181522005-06-27 10:56:24 +020045#define CFQ_KEY_ANY (0xffff)
Jens Axboe22e2c502005-06-27 10:55:12 +020046
47/*
48 * disable queueing at the driver/hardware level
49 */
50static int cfq_max_depth = 1;
51
Linus Torvalds1da177e2005-04-16 15:20:36 -070052/*
53 * for the hash of cfqq inside the cfqd
54 */
55#define CFQ_QHASH_SHIFT 6
56#define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT)
57#define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash)
58
59/*
60 * for the hash of crq inside the cfqq
61 */
62#define CFQ_MHASH_SHIFT 6
63#define CFQ_MHASH_BLOCK(sec) ((sec) >> 3)
64#define CFQ_MHASH_ENTRIES (1 << CFQ_MHASH_SHIFT)
65#define CFQ_MHASH_FN(sec) hash_long(CFQ_MHASH_BLOCK(sec), CFQ_MHASH_SHIFT)
66#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
67#define list_entry_hash(ptr) hlist_entry((ptr), struct cfq_rq, hash)
68
69#define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list)
Jens Axboe22e2c502005-06-27 10:55:12 +020070#define list_entry_fifo(ptr) list_entry((ptr), struct request, queuelist)
Linus Torvalds1da177e2005-04-16 15:20:36 -070071
72#define RQ_DATA(rq) (rq)->elevator_private
73
74/*
75 * rb-tree defines
76 */
77#define RB_NONE (2)
78#define RB_EMPTY(node) ((node)->rb_node == NULL)
79#define RB_CLEAR_COLOR(node) (node)->rb_color = RB_NONE
80#define RB_CLEAR(node) do { \
81 (node)->rb_parent = NULL; \
82 RB_CLEAR_COLOR((node)); \
83 (node)->rb_right = NULL; \
84 (node)->rb_left = NULL; \
85} while (0)
86#define RB_CLEAR_ROOT(root) ((root)->rb_node = NULL)
87#define ON_RB(node) ((node)->rb_color != RB_NONE)
88#define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node)
89#define rq_rb_key(rq) (rq)->sector
90
Linus Torvalds1da177e2005-04-16 15:20:36 -070091static kmem_cache_t *crq_pool;
92static kmem_cache_t *cfq_pool;
93static kmem_cache_t *cfq_ioc_pool;
94
Jens Axboe22e2c502005-06-27 10:55:12 +020095#define CFQ_PRIO_LISTS IOPRIO_BE_NR
96#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
97#define cfq_class_be(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_BE)
98#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
99
Jens Axboe3b181522005-06-27 10:56:24 +0200100#define ASYNC (0)
101#define SYNC (1)
102
103#define cfq_cfqq_dispatched(cfqq) \
104 ((cfqq)->on_dispatch[ASYNC] + (cfqq)->on_dispatch[SYNC])
105
106#define cfq_cfqq_class_sync(cfqq) ((cfqq)->key != CFQ_KEY_ASYNC)
107
108#define cfq_cfqq_sync(cfqq) \
109 (cfq_cfqq_class_sync(cfqq) || (cfqq)->on_dispatch[SYNC])
Jens Axboe22e2c502005-06-27 10:55:12 +0200110
111/*
112 * Per block device queue structure
113 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114struct cfq_data {
Jens Axboe22e2c502005-06-27 10:55:12 +0200115 atomic_t ref;
116 request_queue_t *queue;
117
118 /*
119 * rr list of queues with requests and the count of them
120 */
121 struct list_head rr_list[CFQ_PRIO_LISTS];
122 struct list_head busy_rr;
123 struct list_head cur_rr;
124 struct list_head idle_rr;
125 unsigned int busy_queues;
126
127 /*
128 * non-ordered list of empty cfqq's
129 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700130 struct list_head empty_list;
131
Jens Axboe22e2c502005-06-27 10:55:12 +0200132 /*
133 * cfqq lookup hash
134 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135 struct hlist_head *cfq_hash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136
Jens Axboe22e2c502005-06-27 10:55:12 +0200137 /*
138 * global crq hash for all queues
139 */
140 struct hlist_head *crq_hash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141
142 unsigned int max_queued;
143
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144 mempool_t *crq_pool;
145
Jens Axboe22e2c502005-06-27 10:55:12 +0200146 int rq_in_driver;
147
148 /*
149 * schedule slice state info
150 */
151 /*
152 * idle window management
153 */
154 struct timer_list idle_slice_timer;
155 struct work_struct unplug_work;
156
157 struct cfq_queue *active_queue;
158 struct cfq_io_context *active_cic;
159 int cur_prio, cur_end_prio;
160 unsigned int dispatch_slice;
161
162 struct timer_list idle_class_timer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163
164 sector_t last_sector;
Jens Axboe22e2c502005-06-27 10:55:12 +0200165 unsigned long last_end_request;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166
Jens Axboe22e2c502005-06-27 10:55:12 +0200167 unsigned int rq_starved;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168
169 /*
170 * tunables, see top of file
171 */
172 unsigned int cfq_quantum;
173 unsigned int cfq_queued;
Jens Axboe22e2c502005-06-27 10:55:12 +0200174 unsigned int cfq_fifo_expire[2];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175 unsigned int cfq_back_penalty;
176 unsigned int cfq_back_max;
Jens Axboe22e2c502005-06-27 10:55:12 +0200177 unsigned int cfq_slice[2];
178 unsigned int cfq_slice_async_rq;
179 unsigned int cfq_slice_idle;
180 unsigned int cfq_max_depth;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181};
182
Jens Axboe22e2c502005-06-27 10:55:12 +0200183/*
184 * Per process-grouping structure
185 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186struct cfq_queue {
187 /* reference count */
188 atomic_t ref;
189 /* parent cfq_data */
190 struct cfq_data *cfqd;
Jens Axboe22e2c502005-06-27 10:55:12 +0200191 /* cfqq lookup hash */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192 struct hlist_node cfq_hash;
193 /* hash key */
Jens Axboe22e2c502005-06-27 10:55:12 +0200194 unsigned int key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195 /* on either rr or empty list of cfqd */
196 struct list_head cfq_list;
197 /* sorted list of pending requests */
198 struct rb_root sort_list;
199 /* if fifo isn't expired, next request to serve */
200 struct cfq_rq *next_crq;
201 /* requests queued in sort_list */
202 int queued[2];
203 /* currently allocated requests */
204 int allocated[2];
205 /* fifo list of requests in sort_list */
Jens Axboe22e2c502005-06-27 10:55:12 +0200206 struct list_head fifo;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207
Jens Axboe22e2c502005-06-27 10:55:12 +0200208 unsigned long slice_start;
209 unsigned long slice_end;
210 unsigned long slice_left;
211 unsigned long service_last;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212
Jens Axboe3b181522005-06-27 10:56:24 +0200213 /* number of requests that are on the dispatch list */
214 int on_dispatch[2];
Jens Axboe22e2c502005-06-27 10:55:12 +0200215
216 /* io prio of this group */
217 unsigned short ioprio, org_ioprio;
218 unsigned short ioprio_class, org_ioprio_class;
219
Jens Axboe3b181522005-06-27 10:56:24 +0200220 /* various state flags, see below */
221 unsigned int flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222};
223
224struct cfq_rq {
225 struct rb_node rb_node;
226 sector_t rb_key;
227 struct request *request;
228 struct hlist_node hash;
229
230 struct cfq_queue *cfq_queue;
231 struct cfq_io_context *io_context;
232
Jens Axboe3b181522005-06-27 10:56:24 +0200233 unsigned int crq_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234};
235
Jens Axboe3b181522005-06-27 10:56:24 +0200236enum cfqq_state_flags {
237 CFQ_CFQQ_FLAG_on_rr = 0,
238 CFQ_CFQQ_FLAG_wait_request,
239 CFQ_CFQQ_FLAG_must_alloc,
240 CFQ_CFQQ_FLAG_must_alloc_slice,
241 CFQ_CFQQ_FLAG_must_dispatch,
242 CFQ_CFQQ_FLAG_fifo_expire,
243 CFQ_CFQQ_FLAG_idle_window,
244 CFQ_CFQQ_FLAG_prio_changed,
245 CFQ_CFQQ_FLAG_expired,
246};
247
248#define CFQ_CFQQ_FNS(name) \
249static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq) \
250{ \
251 cfqq->flags |= (1 << CFQ_CFQQ_FLAG_##name); \
252} \
253static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq) \
254{ \
255 cfqq->flags &= ~(1 << CFQ_CFQQ_FLAG_##name); \
256} \
257static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \
258{ \
259 return (cfqq->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \
260}
261
262CFQ_CFQQ_FNS(on_rr);
263CFQ_CFQQ_FNS(wait_request);
264CFQ_CFQQ_FNS(must_alloc);
265CFQ_CFQQ_FNS(must_alloc_slice);
266CFQ_CFQQ_FNS(must_dispatch);
267CFQ_CFQQ_FNS(fifo_expire);
268CFQ_CFQQ_FNS(idle_window);
269CFQ_CFQQ_FNS(prio_changed);
270CFQ_CFQQ_FNS(expired);
271#undef CFQ_CFQQ_FNS
272
273enum cfq_rq_state_flags {
274 CFQ_CRQ_FLAG_in_flight = 0,
275 CFQ_CRQ_FLAG_in_driver,
276 CFQ_CRQ_FLAG_is_sync,
277 CFQ_CRQ_FLAG_requeued,
278};
279
280#define CFQ_CRQ_FNS(name) \
281static inline void cfq_mark_crq_##name(struct cfq_rq *crq) \
282{ \
283 crq->crq_flags |= (1 << CFQ_CRQ_FLAG_##name); \
284} \
285static inline void cfq_clear_crq_##name(struct cfq_rq *crq) \
286{ \
287 crq->crq_flags &= ~(1 << CFQ_CRQ_FLAG_##name); \
288} \
289static inline int cfq_crq_##name(const struct cfq_rq *crq) \
290{ \
291 return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0; \
292}
293
294CFQ_CRQ_FNS(in_flight);
295CFQ_CRQ_FNS(in_driver);
296CFQ_CRQ_FNS(is_sync);
297CFQ_CRQ_FNS(requeued);
298#undef CFQ_CRQ_FNS
299
300static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700301static void cfq_dispatch_sort(request_queue_t *, struct cfq_rq *);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302static void cfq_put_cfqd(struct cfq_data *cfqd);
Jens Axboe3b181522005-06-27 10:56:24 +0200303static inline int cfq_pending_requests(struct cfq_data *cfqd);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304
Jens Axboe22e2c502005-06-27 10:55:12 +0200305#define process_sync(tsk) ((tsk)->flags & PF_SYNCWRITE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306
307/*
308 * lots of deadline iosched dupes, can be abstracted later...
309 */
310static inline void cfq_del_crq_hash(struct cfq_rq *crq)
311{
312 hlist_del_init(&crq->hash);
313}
314
315static void cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq)
316{
317 cfq_del_crq_hash(crq);
318
319 if (q->last_merge == crq->request)
320 q->last_merge = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321}
322
323static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
324{
325 const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request));
326
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327 hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]);
328}
329
330static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
331{
332 struct hlist_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)];
333 struct hlist_node *entry, *next;
334
335 hlist_for_each_safe(entry, next, hash_list) {
336 struct cfq_rq *crq = list_entry_hash(entry);
337 struct request *__rq = crq->request;
338
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339 if (!rq_mergeable(__rq)) {
340 cfq_del_crq_hash(crq);
341 continue;
342 }
343
344 if (rq_hash_key(__rq) == offset)
345 return __rq;
346 }
347
348 return NULL;
349}
350
351/*
352 * Lifted from AS - choose which of crq1 and crq2 that is best served now.
353 * We choose the request that is closest to the head right now. Distance
354 * behind the head are penalized and only allowed to a certain extent.
355 */
356static struct cfq_rq *
357cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
358{
359 sector_t last, s1, s2, d1 = 0, d2 = 0;
360 int r1_wrap = 0, r2_wrap = 0; /* requests are behind the disk head */
361 unsigned long back_max;
362
363 if (crq1 == NULL || crq1 == crq2)
364 return crq2;
365 if (crq2 == NULL)
366 return crq1;
Jens Axboe3b181522005-06-27 10:56:24 +0200367 if (cfq_crq_requeued(crq1))
Jens Axboe22e2c502005-06-27 10:55:12 +0200368 return crq1;
Jens Axboe3b181522005-06-27 10:56:24 +0200369 if (cfq_crq_requeued(crq2))
Jens Axboe22e2c502005-06-27 10:55:12 +0200370 return crq2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371
372 s1 = crq1->request->sector;
373 s2 = crq2->request->sector;
374
375 last = cfqd->last_sector;
376
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 /*
378 * by definition, 1KiB is 2 sectors
379 */
380 back_max = cfqd->cfq_back_max * 2;
381
382 /*
383 * Strict one way elevator _except_ in the case where we allow
384 * short backward seeks which are biased as twice the cost of a
385 * similar forward seek.
386 */
387 if (s1 >= last)
388 d1 = s1 - last;
389 else if (s1 + back_max >= last)
390 d1 = (last - s1) * cfqd->cfq_back_penalty;
391 else
392 r1_wrap = 1;
393
394 if (s2 >= last)
395 d2 = s2 - last;
396 else if (s2 + back_max >= last)
397 d2 = (last - s2) * cfqd->cfq_back_penalty;
398 else
399 r2_wrap = 1;
400
401 /* Found required data */
402 if (!r1_wrap && r2_wrap)
403 return crq1;
404 else if (!r2_wrap && r1_wrap)
405 return crq2;
406 else if (r1_wrap && r2_wrap) {
407 /* both behind the head */
408 if (s1 <= s2)
409 return crq1;
410 else
411 return crq2;
412 }
413
414 /* Both requests in front of the head */
415 if (d1 < d2)
416 return crq1;
417 else if (d2 < d1)
418 return crq2;
419 else {
420 if (s1 >= s2)
421 return crq1;
422 else
423 return crq2;
424 }
425}
426
427/*
428 * would be nice to take fifo expire time into account as well
429 */
430static struct cfq_rq *
431cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
432 struct cfq_rq *last)
433{
434 struct cfq_rq *crq_next = NULL, *crq_prev = NULL;
435 struct rb_node *rbnext, *rbprev;
436
Jens Axboe3d25f352005-06-27 10:55:49 +0200437 rbnext = NULL;
Jens Axboe22e2c502005-06-27 10:55:12 +0200438 if (ON_RB(&last->rb_node))
439 rbnext = rb_next(&last->rb_node);
Jens Axboe3d25f352005-06-27 10:55:49 +0200440 if (!rbnext) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 rbnext = rb_first(&cfqq->sort_list);
Jens Axboe22e2c502005-06-27 10:55:12 +0200442 if (rbnext == &last->rb_node)
443 rbnext = NULL;
444 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445
446 rbprev = rb_prev(&last->rb_node);
447
448 if (rbprev)
449 crq_prev = rb_entry_crq(rbprev);
450 if (rbnext)
451 crq_next = rb_entry_crq(rbnext);
452
453 return cfq_choose_req(cfqd, crq_next, crq_prev);
454}
455
456static void cfq_update_next_crq(struct cfq_rq *crq)
457{
458 struct cfq_queue *cfqq = crq->cfq_queue;
459
460 if (cfqq->next_crq == crq)
461 cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq);
462}
463
Jens Axboe22e2c502005-06-27 10:55:12 +0200464static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465{
Jens Axboe22e2c502005-06-27 10:55:12 +0200466 struct cfq_data *cfqd = cfqq->cfqd;
467 struct list_head *list, *entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468
Jens Axboe3b181522005-06-27 10:56:24 +0200469 BUG_ON(!cfq_cfqq_on_rr(cfqq));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470
471 list_del(&cfqq->cfq_list);
472
Jens Axboe22e2c502005-06-27 10:55:12 +0200473 if (cfq_class_rt(cfqq))
474 list = &cfqd->cur_rr;
475 else if (cfq_class_idle(cfqq))
476 list = &cfqd->idle_rr;
477 else {
478 /*
479 * if cfqq has requests in flight, don't allow it to be
480 * found in cfq_set_active_queue before it has finished them.
481 * this is done to increase fairness between a process that
482 * has lots of io pending vs one that only generates one
483 * sporadically or synchronously
484 */
Jens Axboe3b181522005-06-27 10:56:24 +0200485 if (cfq_cfqq_dispatched(cfqq))
Jens Axboe22e2c502005-06-27 10:55:12 +0200486 list = &cfqd->busy_rr;
487 else
488 list = &cfqd->rr_list[cfqq->ioprio];
489 }
490
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491 /*
Jens Axboe22e2c502005-06-27 10:55:12 +0200492 * if queue was preempted, just add to front to be fair. busy_rr
493 * isn't sorted.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700494 */
Jens Axboe22e2c502005-06-27 10:55:12 +0200495 if (preempted || list == &cfqd->busy_rr) {
496 list_add(&cfqq->cfq_list, list);
497 return;
498 }
499
500 /*
501 * sort by when queue was last serviced
502 */
503 entry = list;
504 while ((entry = entry->prev) != list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700505 struct cfq_queue *__cfqq = list_entry_cfqq(entry);
506
Jens Axboe22e2c502005-06-27 10:55:12 +0200507 if (!__cfqq->service_last)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508 break;
Jens Axboe22e2c502005-06-27 10:55:12 +0200509 if (time_before(__cfqq->service_last, cfqq->service_last))
510 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700511 }
512
513 list_add(&cfqq->cfq_list, entry);
514}
515
516/*
517 * add to busy list of queues for service, trying to be fair in ordering
Jens Axboe22e2c502005-06-27 10:55:12 +0200518 * the pending list according to last request service
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519 */
520static inline void
Jens Axboe22e2c502005-06-27 10:55:12 +0200521cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq, int requeue)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522{
Jens Axboe3b181522005-06-27 10:56:24 +0200523 BUG_ON(cfq_cfqq_on_rr(cfqq));
524 cfq_mark_cfqq_on_rr(cfqq);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525 cfqd->busy_queues++;
526
Jens Axboe22e2c502005-06-27 10:55:12 +0200527 cfq_resort_rr_list(cfqq, requeue);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528}
529
530static inline void
531cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
532{
Jens Axboe3b181522005-06-27 10:56:24 +0200533 BUG_ON(!cfq_cfqq_on_rr(cfqq));
534 cfq_clear_cfqq_on_rr(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +0200535 list_move(&cfqq->cfq_list, &cfqd->empty_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536
537 BUG_ON(!cfqd->busy_queues);
538 cfqd->busy_queues--;
539}
540
541/*
542 * rb tree support functions
543 */
544static inline void cfq_del_crq_rb(struct cfq_rq *crq)
545{
546 struct cfq_queue *cfqq = crq->cfq_queue;
547
548 if (ON_RB(&crq->rb_node)) {
549 struct cfq_data *cfqd = cfqq->cfqd;
Jens Axboe3b181522005-06-27 10:56:24 +0200550 const int sync = cfq_crq_is_sync(crq);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551
Jens Axboe22e2c502005-06-27 10:55:12 +0200552 BUG_ON(!cfqq->queued[sync]);
553 cfqq->queued[sync]--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554
555 cfq_update_next_crq(crq);
556
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557 rb_erase(&crq->rb_node, &cfqq->sort_list);
558 RB_CLEAR_COLOR(&crq->rb_node);
559
Jens Axboe3b181522005-06-27 10:56:24 +0200560 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY(&cfqq->sort_list))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561 cfq_del_cfqq_rr(cfqd, cfqq);
562 }
563}
564
565static struct cfq_rq *
566__cfq_add_crq_rb(struct cfq_rq *crq)
567{
568 struct rb_node **p = &crq->cfq_queue->sort_list.rb_node;
569 struct rb_node *parent = NULL;
570 struct cfq_rq *__crq;
571
572 while (*p) {
573 parent = *p;
574 __crq = rb_entry_crq(parent);
575
576 if (crq->rb_key < __crq->rb_key)
577 p = &(*p)->rb_left;
578 else if (crq->rb_key > __crq->rb_key)
579 p = &(*p)->rb_right;
580 else
581 return __crq;
582 }
583
584 rb_link_node(&crq->rb_node, parent, p);
585 return NULL;
586}
587
588static void cfq_add_crq_rb(struct cfq_rq *crq)
589{
590 struct cfq_queue *cfqq = crq->cfq_queue;
591 struct cfq_data *cfqd = cfqq->cfqd;
592 struct request *rq = crq->request;
593 struct cfq_rq *__alias;
594
595 crq->rb_key = rq_rb_key(rq);
Jens Axboe3b181522005-06-27 10:56:24 +0200596 cfqq->queued[cfq_crq_is_sync(crq)]++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597
598 /*
599 * looks a little odd, but the first insert might return an alias.
600 * if that happens, put the alias on the dispatch list
601 */
602 while ((__alias = __cfq_add_crq_rb(crq)) != NULL)
603 cfq_dispatch_sort(cfqd->queue, __alias);
604
605 rb_insert_color(&crq->rb_node, &cfqq->sort_list);
606
Jens Axboe3b181522005-06-27 10:56:24 +0200607 if (!cfq_cfqq_on_rr(cfqq))
608 cfq_add_cfqq_rr(cfqd, cfqq, cfq_crq_requeued(crq));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609
610 /*
611 * check if this request is a better next-serve candidate
612 */
613 cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq);
614}
615
616static inline void
617cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
618{
619 if (ON_RB(&crq->rb_node)) {
620 rb_erase(&crq->rb_node, &cfqq->sort_list);
Jens Axboe3b181522005-06-27 10:56:24 +0200621 cfqq->queued[cfq_crq_is_sync(crq)]--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622 }
623
624 cfq_add_crq_rb(crq);
625}
626
Jens Axboe22e2c502005-06-27 10:55:12 +0200627static struct request *cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector)
628
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629{
Jens Axboe3b181522005-06-27 10:56:24 +0200630 struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->pid, CFQ_KEY_ANY);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631 struct rb_node *n;
632
633 if (!cfqq)
634 goto out;
635
636 n = cfqq->sort_list.rb_node;
637 while (n) {
638 struct cfq_rq *crq = rb_entry_crq(n);
639
640 if (sector < crq->rb_key)
641 n = n->rb_left;
642 else if (sector > crq->rb_key)
643 n = n->rb_right;
644 else
645 return crq->request;
646 }
647
648out:
649 return NULL;
650}
651
652static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
653{
Jens Axboe22e2c502005-06-27 10:55:12 +0200654 struct cfq_data *cfqd = q->elevator->elevator_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655 struct cfq_rq *crq = RQ_DATA(rq);
656
657 if (crq) {
658 struct cfq_queue *cfqq = crq->cfq_queue;
659
Jens Axboe3b181522005-06-27 10:56:24 +0200660 if (cfq_crq_in_driver(crq)) {
661 cfq_clear_crq_in_driver(crq);
Jens Axboe22e2c502005-06-27 10:55:12 +0200662 WARN_ON(!cfqd->rq_in_driver);
663 cfqd->rq_in_driver--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700664 }
Jens Axboe3b181522005-06-27 10:56:24 +0200665 if (cfq_crq_in_flight(crq)) {
666 const int sync = cfq_crq_is_sync(crq);
667
668 cfq_clear_crq_in_flight(crq);
669 WARN_ON(!cfqq->on_dispatch[sync]);
670 cfqq->on_dispatch[sync]--;
Jens Axboe22e2c502005-06-27 10:55:12 +0200671 }
Jens Axboe3b181522005-06-27 10:56:24 +0200672 cfq_mark_crq_requeued(crq);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673 }
674}
675
676/*
677 * make sure the service time gets corrected on reissue of this request
678 */
679static void cfq_requeue_request(request_queue_t *q, struct request *rq)
680{
681 cfq_deactivate_request(q, rq);
682 list_add(&rq->queuelist, &q->queue_head);
683}
684
685static void cfq_remove_request(request_queue_t *q, struct request *rq)
686{
687 struct cfq_rq *crq = RQ_DATA(rq);
688
689 if (crq) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690 list_del_init(&rq->queuelist);
Jens Axboe22e2c502005-06-27 10:55:12 +0200691 cfq_del_crq_rb(crq);
692 cfq_remove_merge_hints(q, crq);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700693
Linus Torvalds1da177e2005-04-16 15:20:36 -0700694 }
695}
696
697static int
698cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
699{
700 struct cfq_data *cfqd = q->elevator->elevator_data;
701 struct request *__rq;
702 int ret;
703
704 ret = elv_try_last_merge(q, bio);
705 if (ret != ELEVATOR_NO_MERGE) {
706 __rq = q->last_merge;
707 goto out_insert;
708 }
709
710 __rq = cfq_find_rq_hash(cfqd, bio->bi_sector);
Jens Axboe22e2c502005-06-27 10:55:12 +0200711 if (__rq && elv_rq_merge_ok(__rq, bio)) {
712 ret = ELEVATOR_BACK_MERGE;
713 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714 }
715
716 __rq = cfq_find_rq_rb(cfqd, bio->bi_sector + bio_sectors(bio));
Jens Axboe22e2c502005-06-27 10:55:12 +0200717 if (__rq && elv_rq_merge_ok(__rq, bio)) {
718 ret = ELEVATOR_FRONT_MERGE;
719 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720 }
721
722 return ELEVATOR_NO_MERGE;
723out:
724 q->last_merge = __rq;
725out_insert:
726 *req = __rq;
727 return ret;
728}
729
730static void cfq_merged_request(request_queue_t *q, struct request *req)
731{
732 struct cfq_data *cfqd = q->elevator->elevator_data;
733 struct cfq_rq *crq = RQ_DATA(req);
734
735 cfq_del_crq_hash(crq);
736 cfq_add_crq_hash(cfqd, crq);
737
738 if (ON_RB(&crq->rb_node) && (rq_rb_key(req) != crq->rb_key)) {
739 struct cfq_queue *cfqq = crq->cfq_queue;
740
741 cfq_update_next_crq(crq);
742 cfq_reposition_crq_rb(cfqq, crq);
743 }
744
745 q->last_merge = req;
746}
747
748static void
749cfq_merged_requests(request_queue_t *q, struct request *rq,
750 struct request *next)
751{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752 cfq_merged_request(q, rq);
753
Jens Axboe22e2c502005-06-27 10:55:12 +0200754 /*
755 * reposition in fifo if next is older than rq
756 */
757 if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
758 time_before(next->start_time, rq->start_time))
759 list_move(&rq->queuelist, &next->queuelist);
760
761 cfq_remove_request(q, next);
762}
763
764static inline void
765__cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
766{
767 if (cfqq) {
768 /*
769 * stop potential idle class queues waiting service
770 */
771 del_timer(&cfqd->idle_class_timer);
772
773 cfqq->slice_start = jiffies;
774 cfqq->slice_end = 0;
775 cfqq->slice_left = 0;
Jens Axboe3b181522005-06-27 10:56:24 +0200776 cfq_clear_cfqq_must_alloc_slice(cfqq);
777 cfq_clear_cfqq_fifo_expire(cfqq);
778 cfq_clear_cfqq_expired(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +0200779 }
780
781 cfqd->active_queue = cfqq;
782}
783
784/*
785 * 0
786 * 0,1
787 * 0,1,2
788 * 0,1,2,3
789 * 0,1,2,3,4
790 * 0,1,2,3,4,5
791 * 0,1,2,3,4,5,6
792 * 0,1,2,3,4,5,6,7
793 */
794static int cfq_get_next_prio_level(struct cfq_data *cfqd)
795{
796 int prio, wrap;
797
798 prio = -1;
799 wrap = 0;
800 do {
801 int p;
802
803 for (p = cfqd->cur_prio; p <= cfqd->cur_end_prio; p++) {
804 if (!list_empty(&cfqd->rr_list[p])) {
805 prio = p;
806 break;
807 }
808 }
809
810 if (prio != -1)
811 break;
812 cfqd->cur_prio = 0;
813 if (++cfqd->cur_end_prio == CFQ_PRIO_LISTS) {
814 cfqd->cur_end_prio = 0;
815 if (wrap)
816 break;
817 wrap = 1;
818 }
819 } while (1);
820
821 if (unlikely(prio == -1))
822 return -1;
823
824 BUG_ON(prio >= CFQ_PRIO_LISTS);
825
826 list_splice_init(&cfqd->rr_list[prio], &cfqd->cur_rr);
827
828 cfqd->cur_prio = prio + 1;
829 if (cfqd->cur_prio > cfqd->cur_end_prio) {
830 cfqd->cur_end_prio = cfqd->cur_prio;
831 cfqd->cur_prio = 0;
832 }
833 if (cfqd->cur_end_prio == CFQ_PRIO_LISTS) {
834 cfqd->cur_prio = 0;
835 cfqd->cur_end_prio = 0;
836 }
837
838 return prio;
839}
840
Jens Axboe3b181522005-06-27 10:56:24 +0200841static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd)
Jens Axboe22e2c502005-06-27 10:55:12 +0200842{
Jens Axboe3b181522005-06-27 10:56:24 +0200843 struct cfq_queue *cfqq;
844
845 /*
846 * if current queue is expired but not done with its requests yet,
847 * wait for that to happen
848 */
849 if ((cfqq = cfqd->active_queue) != NULL) {
850 if (cfq_cfqq_expired(cfqq) && cfq_cfqq_dispatched(cfqq))
851 return NULL;
852 }
Jens Axboe22e2c502005-06-27 10:55:12 +0200853
854 /*
855 * if current list is non-empty, grab first entry. if it is empty,
856 * get next prio level and grab first entry then if any are spliced
857 */
858 if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1)
859 cfqq = list_entry_cfqq(cfqd->cur_rr.next);
860
861 /*
862 * if we have idle queues and no rt or be queues had pending
863 * requests, either allow immediate service if the grace period
864 * has passed or arm the idle grace timer
865 */
866 if (!cfqq && !list_empty(&cfqd->idle_rr)) {
867 unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE;
868
869 if (time_after_eq(jiffies, end))
870 cfqq = list_entry_cfqq(cfqd->idle_rr.next);
871 else
872 mod_timer(&cfqd->idle_class_timer, end);
873 }
874
875 __cfq_set_active_queue(cfqd, cfqq);
Jens Axboe3b181522005-06-27 10:56:24 +0200876 return cfqq;
Jens Axboe22e2c502005-06-27 10:55:12 +0200877}
878
879/*
880 * current cfqq expired its slice (or was too idle), select new one
881 */
Jens Axboe3b181522005-06-27 10:56:24 +0200882static void
883__cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
884 int preempted)
885{
886 unsigned long now = jiffies;
887
888 if (cfq_cfqq_wait_request(cfqq))
889 del_timer(&cfqd->idle_slice_timer);
890
891 if (!preempted && !cfq_cfqq_dispatched(cfqq))
892 cfqq->service_last = now;
893
894 cfq_clear_cfqq_must_dispatch(cfqq);
895 cfq_clear_cfqq_wait_request(cfqq);
896
897 /*
898 * store what was left of this slice, if the queue idled out
899 * or was preempted
900 */
901 if (time_after(now, cfqq->slice_end))
902 cfqq->slice_left = now - cfqq->slice_end;
903 else
904 cfqq->slice_left = 0;
905
906 if (cfq_cfqq_on_rr(cfqq))
907 cfq_resort_rr_list(cfqq, preempted);
908
909 if (cfqq == cfqd->active_queue)
910 cfqd->active_queue = NULL;
911
912 if (cfqd->active_cic) {
913 put_io_context(cfqd->active_cic->ioc);
914 cfqd->active_cic = NULL;
915 }
916
917 cfqd->dispatch_slice = 0;
918}
919
Jens Axboe22e2c502005-06-27 10:55:12 +0200920static inline void cfq_slice_expired(struct cfq_data *cfqd, int preempted)
921{
922 struct cfq_queue *cfqq = cfqd->active_queue;
923
924 if (cfqq) {
Jens Axboe22e2c502005-06-27 10:55:12 +0200925 /*
Jens Axboe3b181522005-06-27 10:56:24 +0200926 * use deferred expiry, if there are requests in progress as
927 * not to disturb the slice of the next queue
Jens Axboe22e2c502005-06-27 10:55:12 +0200928 */
Jens Axboe3b181522005-06-27 10:56:24 +0200929 if (cfq_cfqq_dispatched(cfqq))
930 cfq_mark_cfqq_expired(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +0200931 else
Jens Axboe3b181522005-06-27 10:56:24 +0200932 __cfq_slice_expired(cfqd, cfqq, preempted);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700933 }
Jens Axboe22e2c502005-06-27 10:55:12 +0200934}
935
936static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
937
938{
939 WARN_ON(!RB_EMPTY(&cfqq->sort_list));
940 WARN_ON(cfqq != cfqd->active_queue);
941
942 /*
943 * idle is disabled, either manually or by past process history
944 */
945 if (!cfqd->cfq_slice_idle)
946 return 0;
Jens Axboe3b181522005-06-27 10:56:24 +0200947 if (!cfq_cfqq_idle_window(cfqq))
Jens Axboe22e2c502005-06-27 10:55:12 +0200948 return 0;
949 /*
950 * task has exited, don't wait
951 */
952 if (cfqd->active_cic && !cfqd->active_cic->ioc->task)
953 return 0;
954
Jens Axboe3b181522005-06-27 10:56:24 +0200955 cfq_mark_cfqq_must_dispatch(cfqq);
956 cfq_mark_cfqq_wait_request(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +0200957
958 if (!timer_pending(&cfqd->idle_slice_timer)) {
Jens Axboe3b181522005-06-27 10:56:24 +0200959 unsigned long slice_left = min(cfqq->slice_end - 1, (unsigned long) cfqd->cfq_slice_idle);
Jens Axboe22e2c502005-06-27 10:55:12 +0200960
Jens Axboe3b181522005-06-27 10:56:24 +0200961 cfqd->idle_slice_timer.expires = jiffies + slice_left;
Jens Axboe22e2c502005-06-27 10:55:12 +0200962 add_timer(&cfqd->idle_slice_timer);
963 }
964
965 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966}
967
968/*
969 * we dispatch cfqd->cfq_quantum requests in total from the rr_list queues,
970 * this function sector sorts the selected request to minimize seeks. we start
971 * at cfqd->last_sector, not 0.
972 */
973static void cfq_dispatch_sort(request_queue_t *q, struct cfq_rq *crq)
974{
975 struct cfq_data *cfqd = q->elevator->elevator_data;
976 struct cfq_queue *cfqq = crq->cfq_queue;
977 struct list_head *head = &q->queue_head, *entry = head;
978 struct request *__rq;
979 sector_t last;
980
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981 list_del(&crq->request->queuelist);
982
983 last = cfqd->last_sector;
Jens Axboe22e2c502005-06-27 10:55:12 +0200984 list_for_each_entry_reverse(__rq, head, queuelist) {
985 struct cfq_rq *__crq = RQ_DATA(__rq);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986
Jens Axboe22e2c502005-06-27 10:55:12 +0200987 if (blk_barrier_rq(__rq))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700988 break;
Jens Axboe22e2c502005-06-27 10:55:12 +0200989 if (!blk_fs_request(__rq))
990 break;
Jens Axboe3b181522005-06-27 10:56:24 +0200991 if (cfq_crq_requeued(__crq))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992 break;
993
Jens Axboe22e2c502005-06-27 10:55:12 +0200994 if (__rq->sector <= crq->request->sector)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700995 break;
996 if (__rq->sector > last && crq->request->sector < last) {
Jens Axboe22e2c502005-06-27 10:55:12 +0200997 last = crq->request->sector + crq->request->nr_sectors;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998 break;
999 }
Jens Axboe22e2c502005-06-27 10:55:12 +02001000 entry = &__rq->queuelist;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001001 }
1002
1003 cfqd->last_sector = last;
Jens Axboe22e2c502005-06-27 10:55:12 +02001004
1005 cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq);
1006
1007 cfq_del_crq_rb(crq);
1008 cfq_remove_merge_hints(q, crq);
1009
Jens Axboe3b181522005-06-27 10:56:24 +02001010 cfq_mark_crq_in_flight(crq);
1011 cfq_clear_crq_requeued(crq);
1012
1013 cfqq->on_dispatch[cfq_crq_is_sync(crq)]++;
Jens Axboe22e2c502005-06-27 10:55:12 +02001014 list_add_tail(&crq->request->queuelist, entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015}
1016
1017/*
1018 * return expired entry, or NULL to just start from scratch in rbtree
1019 */
1020static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq)
1021{
1022 struct cfq_data *cfqd = cfqq->cfqd;
Jens Axboe22e2c502005-06-27 10:55:12 +02001023 struct request *rq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024 struct cfq_rq *crq;
1025
Jens Axboe3b181522005-06-27 10:56:24 +02001026 if (cfq_cfqq_fifo_expire(cfqq))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027 return NULL;
1028
Jens Axboe22e2c502005-06-27 10:55:12 +02001029 if (!list_empty(&cfqq->fifo)) {
Jens Axboe3b181522005-06-27 10:56:24 +02001030 int fifo = cfq_cfqq_class_sync(cfqq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031
Jens Axboe22e2c502005-06-27 10:55:12 +02001032 crq = RQ_DATA(list_entry_fifo(cfqq->fifo.next));
1033 rq = crq->request;
1034 if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) {
Jens Axboe3b181522005-06-27 10:56:24 +02001035 cfq_mark_cfqq_fifo_expire(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02001036 return crq;
1037 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001038 }
1039
1040 return NULL;
1041}
1042
1043/*
Jens Axboe3b181522005-06-27 10:56:24 +02001044 * Scale schedule slice based on io priority. Use the sync time slice only
1045 * if a queue is marked sync and has sync io queued. A sync queue with async
1046 * io only, should not get full sync slice length.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047 */
Jens Axboe22e2c502005-06-27 10:55:12 +02001048static inline int
1049cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001050{
Jens Axboe22e2c502005-06-27 10:55:12 +02001051 const int base_slice = cfqd->cfq_slice[cfq_cfqq_sync(cfqq)];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052
Jens Axboe22e2c502005-06-27 10:55:12 +02001053 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054
Jens Axboe22e2c502005-06-27 10:55:12 +02001055 return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - cfqq->ioprio));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056}
1057
Jens Axboe22e2c502005-06-27 10:55:12 +02001058static inline void
1059cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1060{
1061 cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies;
1062}
1063
1064static inline int
1065cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1066{
1067 const int base_rq = cfqd->cfq_slice_async_rq;
1068
1069 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
1070
1071 return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio));
1072}
1073
1074/*
Jens Axboe3b181522005-06-27 10:56:24 +02001075 * scheduler run of queue, if there are requests pending and no one in the
1076 * driver that will restart queueing
1077 */
1078static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
1079{
1080 if (!cfqd->rq_in_driver && cfq_pending_requests(cfqd))
1081 kblockd_schedule_work(&cfqd->unplug_work);
1082}
1083
1084/*
Jens Axboe22e2c502005-06-27 10:55:12 +02001085 * get next queue for service
1086 */
1087static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd, int force)
1088{
1089 unsigned long now = jiffies;
1090 struct cfq_queue *cfqq;
1091
1092 cfqq = cfqd->active_queue;
1093 if (!cfqq)
1094 goto new_queue;
1095
Jens Axboe3b181522005-06-27 10:56:24 +02001096 if (cfq_cfqq_expired(cfqq))
1097 goto new_queue;
1098
Jens Axboe22e2c502005-06-27 10:55:12 +02001099 /*
1100 * slice has expired
1101 */
Jens Axboe3b181522005-06-27 10:56:24 +02001102 if (!cfq_cfqq_must_dispatch(cfqq) && time_after(now, cfqq->slice_end))
1103 goto expire;
Jens Axboe22e2c502005-06-27 10:55:12 +02001104
1105 /*
1106 * if queue has requests, dispatch one. if not, check if
1107 * enough slice is left to wait for one
1108 */
1109 if (!RB_EMPTY(&cfqq->sort_list))
1110 goto keep_queue;
Jens Axboe3b181522005-06-27 10:56:24 +02001111 else if (!force && cfq_cfqq_class_sync(cfqq) &&
Jens Axboe22e2c502005-06-27 10:55:12 +02001112 time_before(now, cfqq->slice_end)) {
1113 if (cfq_arm_slice_timer(cfqd, cfqq))
1114 return NULL;
1115 }
1116
Jens Axboe3b181522005-06-27 10:56:24 +02001117expire:
Jens Axboe22e2c502005-06-27 10:55:12 +02001118 cfq_slice_expired(cfqd, 0);
Jens Axboe3b181522005-06-27 10:56:24 +02001119new_queue:
1120 cfqq = cfq_set_active_queue(cfqd);
Jens Axboe22e2c502005-06-27 10:55:12 +02001121keep_queue:
Jens Axboe3b181522005-06-27 10:56:24 +02001122 return cfqq;
Jens Axboe22e2c502005-06-27 10:55:12 +02001123}
1124
1125static int
1126__cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1127 int max_dispatch)
1128{
1129 int dispatched = 0;
1130
1131 BUG_ON(RB_EMPTY(&cfqq->sort_list));
1132
1133 do {
1134 struct cfq_rq *crq;
1135
1136 /*
1137 * follow expired path, else get first next available
1138 */
1139 if ((crq = cfq_check_fifo(cfqq)) == NULL)
1140 crq = cfqq->next_crq;
1141
1142 /*
1143 * finally, insert request into driver dispatch list
1144 */
1145 cfq_dispatch_sort(cfqd->queue, crq);
1146
1147 cfqd->dispatch_slice++;
1148 dispatched++;
1149
1150 if (!cfqd->active_cic) {
1151 atomic_inc(&crq->io_context->ioc->refcount);
1152 cfqd->active_cic = crq->io_context;
1153 }
1154
1155 if (RB_EMPTY(&cfqq->sort_list))
1156 break;
1157
1158 } while (dispatched < max_dispatch);
1159
1160 /*
1161 * if slice end isn't set yet, set it. if at least one request was
1162 * sync, use the sync time slice value
1163 */
1164 if (!cfqq->slice_end)
1165 cfq_set_prio_slice(cfqd, cfqq);
1166
1167 /*
1168 * expire an async queue immediately if it has used up its slice. idle
1169 * queue always expire after 1 dispatch round.
1170 */
1171 if ((!cfq_cfqq_sync(cfqq) &&
1172 cfqd->dispatch_slice >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
1173 cfq_class_idle(cfqq))
1174 cfq_slice_expired(cfqd, 0);
1175
1176 return dispatched;
1177}
1178
1179static int
1180cfq_dispatch_requests(request_queue_t *q, int max_dispatch, int force)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001181{
1182 struct cfq_data *cfqd = q->elevator->elevator_data;
1183 struct cfq_queue *cfqq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001184
Jens Axboe22e2c502005-06-27 10:55:12 +02001185 if (!cfqd->busy_queues)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186 return 0;
1187
Jens Axboe22e2c502005-06-27 10:55:12 +02001188 cfqq = cfq_select_queue(cfqd, force);
1189 if (cfqq) {
Jens Axboe3b181522005-06-27 10:56:24 +02001190 cfq_clear_cfqq_must_dispatch(cfqq);
1191 cfq_clear_cfqq_wait_request(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02001192 del_timer(&cfqd->idle_slice_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001193
Jens Axboe22e2c502005-06-27 10:55:12 +02001194 if (cfq_class_idle(cfqq))
1195 max_dispatch = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001196
Jens Axboe22e2c502005-06-27 10:55:12 +02001197 return __cfq_dispatch_requests(cfqd, cfqq, max_dispatch);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198 }
1199
Jens Axboe22e2c502005-06-27 10:55:12 +02001200 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201}
1202
1203static inline void cfq_account_dispatch(struct cfq_rq *crq)
1204{
1205 struct cfq_queue *cfqq = crq->cfq_queue;
1206 struct cfq_data *cfqd = cfqq->cfqd;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001207
Jens Axboe22e2c502005-06-27 10:55:12 +02001208 if (unlikely(!blk_fs_request(crq->request)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209 return;
1210
1211 /*
1212 * accounted bit is necessary since some drivers will call
1213 * elv_next_request() many times for the same request (eg ide)
1214 */
Jens Axboe3b181522005-06-27 10:56:24 +02001215 if (cfq_crq_in_driver(crq))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001216 return;
1217
Jens Axboe3b181522005-06-27 10:56:24 +02001218 cfq_mark_crq_in_driver(crq);
Jens Axboe22e2c502005-06-27 10:55:12 +02001219 cfqd->rq_in_driver++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220}
1221
1222static inline void
1223cfq_account_completion(struct cfq_queue *cfqq, struct cfq_rq *crq)
1224{
1225 struct cfq_data *cfqd = cfqq->cfqd;
Jens Axboe22e2c502005-06-27 10:55:12 +02001226 unsigned long now;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227
Jens Axboe3b181522005-06-27 10:56:24 +02001228 if (!cfq_crq_in_driver(crq))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229 return;
1230
Jens Axboe22e2c502005-06-27 10:55:12 +02001231 now = jiffies;
1232
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233 WARN_ON(!cfqd->rq_in_driver);
1234 cfqd->rq_in_driver--;
1235
Jens Axboe22e2c502005-06-27 10:55:12 +02001236 if (!cfq_class_idle(cfqq))
1237 cfqd->last_end_request = now;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001238
Jens Axboe3b181522005-06-27 10:56:24 +02001239 if (!cfq_cfqq_dispatched(cfqq)) {
1240 if (cfq_cfqq_on_rr(cfqq)) {
1241 cfqq->service_last = now;
1242 cfq_resort_rr_list(cfqq, 0);
1243 }
1244 if (cfq_cfqq_expired(cfqq)) {
1245 __cfq_slice_expired(cfqd, cfqq, 0);
1246 cfq_schedule_dispatch(cfqd);
1247 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001248 }
Jens Axboe22e2c502005-06-27 10:55:12 +02001249
Jens Axboe3b181522005-06-27 10:56:24 +02001250 if (cfq_crq_is_sync(crq))
Jens Axboe22e2c502005-06-27 10:55:12 +02001251 crq->io_context->last_end_request = now;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001252}
1253
1254static struct request *cfq_next_request(request_queue_t *q)
1255{
1256 struct cfq_data *cfqd = q->elevator->elevator_data;
1257 struct request *rq;
1258
1259 if (!list_empty(&q->queue_head)) {
1260 struct cfq_rq *crq;
1261dispatch:
1262 rq = list_entry_rq(q->queue_head.next);
1263
Jens Axboe22e2c502005-06-27 10:55:12 +02001264 crq = RQ_DATA(rq);
1265 if (crq) {
Jens Axboe3b181522005-06-27 10:56:24 +02001266 struct cfq_queue *cfqq = crq->cfq_queue;
1267
Jens Axboe22e2c502005-06-27 10:55:12 +02001268 /*
1269 * if idle window is disabled, allow queue buildup
1270 */
Jens Axboe3b181522005-06-27 10:56:24 +02001271 if (!cfq_crq_in_driver(crq) &&
1272 !cfq_cfqq_idle_window(cfqq) &&
Jens Axboe22e2c502005-06-27 10:55:12 +02001273 cfqd->rq_in_driver >= cfqd->cfq_max_depth)
1274 return NULL;
1275
Linus Torvalds1da177e2005-04-16 15:20:36 -07001276 cfq_remove_merge_hints(q, crq);
1277 cfq_account_dispatch(crq);
1278 }
1279
1280 return rq;
1281 }
1282
Jens Axboe22e2c502005-06-27 10:55:12 +02001283 if (cfq_dispatch_requests(q, cfqd->cfq_quantum, 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001284 goto dispatch;
1285
1286 return NULL;
1287}
1288
1289/*
1290 * task holds one reference to the queue, dropped when task exits. each crq
1291 * in-flight on this queue also holds a reference, dropped when crq is freed.
1292 *
1293 * queue lock must be held here.
1294 */
1295static void cfq_put_queue(struct cfq_queue *cfqq)
1296{
Jens Axboe22e2c502005-06-27 10:55:12 +02001297 struct cfq_data *cfqd = cfqq->cfqd;
1298
1299 BUG_ON(atomic_read(&cfqq->ref) <= 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001300
1301 if (!atomic_dec_and_test(&cfqq->ref))
1302 return;
1303
1304 BUG_ON(rb_first(&cfqq->sort_list));
Jens Axboe22e2c502005-06-27 10:55:12 +02001305 BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
Jens Axboe3b181522005-06-27 10:56:24 +02001306 BUG_ON(cfq_cfqq_on_rr(cfqq));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001307
Jens Axboe22e2c502005-06-27 10:55:12 +02001308 if (unlikely(cfqd->active_queue == cfqq)) {
Jens Axboe3b181522005-06-27 10:56:24 +02001309 __cfq_slice_expired(cfqd, cfqq, 0);
1310 cfq_schedule_dispatch(cfqd);
Jens Axboe22e2c502005-06-27 10:55:12 +02001311 }
1312
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313 cfq_put_cfqd(cfqq->cfqd);
1314
1315 /*
1316 * it's on the empty list and still hashed
1317 */
1318 list_del(&cfqq->cfq_list);
1319 hlist_del(&cfqq->cfq_hash);
1320 kmem_cache_free(cfq_pool, cfqq);
1321}
1322
1323static inline struct cfq_queue *
Jens Axboe3b181522005-06-27 10:56:24 +02001324__cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio,
1325 const int hashval)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326{
1327 struct hlist_head *hash_list = &cfqd->cfq_hash[hashval];
1328 struct hlist_node *entry, *next;
1329
1330 hlist_for_each_safe(entry, next, hash_list) {
1331 struct cfq_queue *__cfqq = list_entry_qhash(entry);
Jens Axboe3b181522005-06-27 10:56:24 +02001332 const unsigned short __p = IOPRIO_PRIO_VALUE(__cfqq->ioprio_class, __cfqq->ioprio);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001333
Jens Axboe3b181522005-06-27 10:56:24 +02001334 if (__cfqq->key == key && (__p == prio || prio == CFQ_KEY_ANY))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001335 return __cfqq;
1336 }
1337
1338 return NULL;
1339}
1340
1341static struct cfq_queue *
Jens Axboe3b181522005-06-27 10:56:24 +02001342cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned short prio)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343{
Jens Axboe3b181522005-06-27 10:56:24 +02001344 return __cfq_find_cfq_hash(cfqd, key, prio, hash_long(key, CFQ_QHASH_SHIFT));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001345}
1346
Linus Torvalds1da177e2005-04-16 15:20:36 -07001347static void cfq_free_io_context(struct cfq_io_context *cic)
1348{
Jens Axboe22e2c502005-06-27 10:55:12 +02001349 struct cfq_io_context *__cic;
1350 struct list_head *entry, *next;
1351
1352 list_for_each_safe(entry, next, &cic->list) {
1353 __cic = list_entry(entry, struct cfq_io_context, list);
1354 kmem_cache_free(cfq_ioc_pool, __cic);
1355 }
1356
Linus Torvalds1da177e2005-04-16 15:20:36 -07001357 kmem_cache_free(cfq_ioc_pool, cic);
1358}
1359
1360/*
Jens Axboe22e2c502005-06-27 10:55:12 +02001361 * Called with interrupts disabled
1362 */
1363static void cfq_exit_single_io_context(struct cfq_io_context *cic)
1364{
1365 struct cfq_data *cfqd = cic->cfqq->cfqd;
1366 request_queue_t *q = cfqd->queue;
1367
1368 WARN_ON(!irqs_disabled());
1369
1370 spin_lock(q->queue_lock);
1371
1372 if (unlikely(cic->cfqq == cfqd->active_queue)) {
Jens Axboe3b181522005-06-27 10:56:24 +02001373 __cfq_slice_expired(cfqd, cic->cfqq, 0);
1374 cfq_schedule_dispatch(cfqd);
Jens Axboe22e2c502005-06-27 10:55:12 +02001375 }
1376
1377 cfq_put_queue(cic->cfqq);
1378 cic->cfqq = NULL;
1379 spin_unlock(q->queue_lock);
1380}
1381
1382/*
1383 * Another task may update the task cic list, if it is doing a queue lookup
1384 * on its behalf. cfq_cic_lock excludes such concurrent updates
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385 */
1386static void cfq_exit_io_context(struct cfq_io_context *cic)
1387{
Jens Axboe22e2c502005-06-27 10:55:12 +02001388 struct cfq_io_context *__cic;
1389 struct list_head *entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390 unsigned long flags;
1391
Jens Axboe22e2c502005-06-27 10:55:12 +02001392 local_irq_save(flags);
1393
Linus Torvalds1da177e2005-04-16 15:20:36 -07001394 /*
1395 * put the reference this task is holding to the various queues
1396 */
Jens Axboe22e2c502005-06-27 10:55:12 +02001397 list_for_each(entry, &cic->list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001398 __cic = list_entry(entry, struct cfq_io_context, list);
Jens Axboe22e2c502005-06-27 10:55:12 +02001399 cfq_exit_single_io_context(__cic);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001400 }
1401
Jens Axboe22e2c502005-06-27 10:55:12 +02001402 cfq_exit_single_io_context(cic);
1403 local_irq_restore(flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001404}
1405
Jens Axboe22e2c502005-06-27 10:55:12 +02001406static struct cfq_io_context *
1407cfq_alloc_io_context(struct cfq_data *cfqd, int gfp_mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001408{
Jens Axboe22e2c502005-06-27 10:55:12 +02001409 struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001410
1411 if (cic) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001412 INIT_LIST_HEAD(&cic->list);
1413 cic->cfqq = NULL;
Jens Axboe22e2c502005-06-27 10:55:12 +02001414 cic->key = NULL;
1415 cic->last_end_request = jiffies;
1416 cic->ttime_total = 0;
1417 cic->ttime_samples = 0;
1418 cic->ttime_mean = 0;
1419 cic->dtor = cfq_free_io_context;
1420 cic->exit = cfq_exit_io_context;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001421 }
1422
1423 return cic;
1424}
1425
Jens Axboe22e2c502005-06-27 10:55:12 +02001426static void cfq_init_prio_data(struct cfq_queue *cfqq)
1427{
1428 struct task_struct *tsk = current;
1429 int ioprio_class;
1430
Jens Axboe3b181522005-06-27 10:56:24 +02001431 if (!cfq_cfqq_prio_changed(cfqq))
Jens Axboe22e2c502005-06-27 10:55:12 +02001432 return;
1433
1434 ioprio_class = IOPRIO_PRIO_CLASS(tsk->ioprio);
1435 switch (ioprio_class) {
1436 default:
1437 printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class);
1438 case IOPRIO_CLASS_NONE:
1439 /*
1440 * no prio set, place us in the middle of the BE classes
1441 */
1442 cfqq->ioprio = task_nice_ioprio(tsk);
1443 cfqq->ioprio_class = IOPRIO_CLASS_BE;
1444 break;
1445 case IOPRIO_CLASS_RT:
1446 cfqq->ioprio = task_ioprio(tsk);
1447 cfqq->ioprio_class = IOPRIO_CLASS_RT;
1448 break;
1449 case IOPRIO_CLASS_BE:
1450 cfqq->ioprio = task_ioprio(tsk);
1451 cfqq->ioprio_class = IOPRIO_CLASS_BE;
1452 break;
1453 case IOPRIO_CLASS_IDLE:
1454 cfqq->ioprio_class = IOPRIO_CLASS_IDLE;
1455 cfqq->ioprio = 7;
Jens Axboe3b181522005-06-27 10:56:24 +02001456 cfq_clear_cfqq_idle_window(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02001457 break;
1458 }
1459
1460 /*
1461 * keep track of original prio settings in case we have to temporarily
1462 * elevate the priority of this queue
1463 */
1464 cfqq->org_ioprio = cfqq->ioprio;
1465 cfqq->org_ioprio_class = cfqq->ioprio_class;
1466
Jens Axboe3b181522005-06-27 10:56:24 +02001467 if (cfq_cfqq_on_rr(cfqq))
Jens Axboe22e2c502005-06-27 10:55:12 +02001468 cfq_resort_rr_list(cfqq, 0);
1469
Jens Axboe3b181522005-06-27 10:56:24 +02001470 cfq_clear_cfqq_prio_changed(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02001471}
1472
1473static inline void changed_ioprio(struct cfq_queue *cfqq)
1474{
1475 if (cfqq) {
1476 struct cfq_data *cfqd = cfqq->cfqd;
1477
1478 spin_lock(cfqd->queue->queue_lock);
Jens Axboe3b181522005-06-27 10:56:24 +02001479 cfq_mark_cfqq_prio_changed(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02001480 cfq_init_prio_data(cfqq);
1481 spin_unlock(cfqd->queue->queue_lock);
1482 }
1483}
1484
Linus Torvalds1da177e2005-04-16 15:20:36 -07001485/*
Jens Axboe22e2c502005-06-27 10:55:12 +02001486 * callback from sys_ioprio_set, irqs are disabled
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487 */
Jens Axboe22e2c502005-06-27 10:55:12 +02001488static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001489{
Jens Axboe22e2c502005-06-27 10:55:12 +02001490 struct cfq_io_context *cic = ioc->cic;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001491
Jens Axboe22e2c502005-06-27 10:55:12 +02001492 changed_ioprio(cic->cfqq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001493
Jens Axboe22e2c502005-06-27 10:55:12 +02001494 list_for_each_entry(cic, &cic->list, list)
1495 changed_ioprio(cic->cfqq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001496
Jens Axboe22e2c502005-06-27 10:55:12 +02001497 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498}
1499
1500static struct cfq_queue *
Jens Axboe3b181522005-06-27 10:56:24 +02001501cfq_get_queue(struct cfq_data *cfqd, unsigned int key, unsigned short ioprio,
1502 int gfp_mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001503{
1504 const int hashval = hash_long(key, CFQ_QHASH_SHIFT);
1505 struct cfq_queue *cfqq, *new_cfqq = NULL;
1506
1507retry:
Jens Axboe3b181522005-06-27 10:56:24 +02001508 cfqq = __cfq_find_cfq_hash(cfqd, key, ioprio, hashval);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001509
1510 if (!cfqq) {
1511 if (new_cfqq) {
1512 cfqq = new_cfqq;
1513 new_cfqq = NULL;
Jens Axboe22e2c502005-06-27 10:55:12 +02001514 } else if (gfp_mask & __GFP_WAIT) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001515 spin_unlock_irq(cfqd->queue->queue_lock);
1516 new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);
1517 spin_lock_irq(cfqd->queue->queue_lock);
1518 goto retry;
Jens Axboe22e2c502005-06-27 10:55:12 +02001519 } else {
1520 cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);
1521 if (!cfqq)
1522 goto out;
Kiyoshi Ueda db3b5842005-06-17 16:15:10 +02001523 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001524
1525 memset(cfqq, 0, sizeof(*cfqq));
1526
1527 INIT_HLIST_NODE(&cfqq->cfq_hash);
1528 INIT_LIST_HEAD(&cfqq->cfq_list);
1529 RB_CLEAR_ROOT(&cfqq->sort_list);
Jens Axboe22e2c502005-06-27 10:55:12 +02001530 INIT_LIST_HEAD(&cfqq->fifo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001531
1532 cfqq->key = key;
1533 hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
1534 atomic_set(&cfqq->ref, 0);
1535 cfqq->cfqd = cfqd;
1536 atomic_inc(&cfqd->ref);
Jens Axboe22e2c502005-06-27 10:55:12 +02001537 cfqq->service_last = 0;
1538 /*
1539 * set ->slice_left to allow preemption for a new process
1540 */
1541 cfqq->slice_left = 2 * cfqd->cfq_slice_idle;
Jens Axboe3b181522005-06-27 10:56:24 +02001542 cfq_mark_cfqq_idle_window(cfqq);
1543 cfq_mark_cfqq_prio_changed(cfqq);
1544 cfq_init_prio_data(cfqq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001545 }
1546
1547 if (new_cfqq)
1548 kmem_cache_free(cfq_pool, new_cfqq);
1549
1550 atomic_inc(&cfqq->ref);
1551out:
1552 WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq);
1553 return cfqq;
1554}
1555
Jens Axboe22e2c502005-06-27 10:55:12 +02001556/*
1557 * Setup general io context and cfq io context. There can be several cfq
1558 * io contexts per general io context, if this process is doing io to more
1559 * than one device managed by cfq. Note that caller is holding a reference to
1560 * cfqq, so we don't need to worry about it disappearing
1561 */
1562static struct cfq_io_context *
1563cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, int gfp_mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001564{
Jens Axboe22e2c502005-06-27 10:55:12 +02001565 struct io_context *ioc = NULL;
1566 struct cfq_io_context *cic;
1567
1568 might_sleep_if(gfp_mask & __GFP_WAIT);
1569
1570 ioc = get_io_context(gfp_mask);
1571 if (!ioc)
1572 return NULL;
1573
1574 if ((cic = ioc->cic) == NULL) {
1575 cic = cfq_alloc_io_context(cfqd, gfp_mask);
1576
1577 if (cic == NULL)
1578 goto err;
1579
1580 /*
1581 * manually increment generic io_context usage count, it
1582 * cannot go away since we are already holding one ref to it
1583 */
1584 ioc->cic = cic;
1585 ioc->set_ioprio = cfq_ioc_set_ioprio;
1586 cic->ioc = ioc;
1587 cic->key = cfqd;
1588 atomic_inc(&cfqd->ref);
1589 } else {
1590 struct cfq_io_context *__cic;
1591
1592 /*
1593 * the first cic on the list is actually the head itself
1594 */
1595 if (cic->key == cfqd)
1596 goto out;
1597
1598 /*
1599 * cic exists, check if we already are there. linear search
1600 * should be ok here, the list will usually not be more than
1601 * 1 or a few entries long
1602 */
1603 list_for_each_entry(__cic, &cic->list, list) {
1604 /*
1605 * this process is already holding a reference to
1606 * this queue, so no need to get one more
1607 */
1608 if (__cic->key == cfqd) {
1609 cic = __cic;
1610 goto out;
1611 }
1612 }
1613
1614 /*
1615 * nope, process doesn't have a cic assoicated with this
1616 * cfqq yet. get a new one and add to list
1617 */
1618 __cic = cfq_alloc_io_context(cfqd, gfp_mask);
1619 if (__cic == NULL)
1620 goto err;
1621
1622 __cic->ioc = ioc;
1623 __cic->key = cfqd;
1624 atomic_inc(&cfqd->ref);
1625 list_add(&__cic->list, &cic->list);
1626 cic = __cic;
1627 }
1628
1629out:
1630 return cic;
1631err:
1632 put_io_context(ioc);
1633 return NULL;
1634}
1635
1636static void
1637cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic)
1638{
1639 unsigned long elapsed, ttime;
1640
1641 /*
1642 * if this context already has stuff queued, thinktime is from
1643 * last queue not last end
1644 */
1645#if 0
1646 if (time_after(cic->last_end_request, cic->last_queue))
1647 elapsed = jiffies - cic->last_end_request;
1648 else
1649 elapsed = jiffies - cic->last_queue;
1650#else
1651 elapsed = jiffies - cic->last_end_request;
1652#endif
1653
1654 ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle);
1655
1656 cic->ttime_samples = (7*cic->ttime_samples + 256) / 8;
1657 cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8;
1658 cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples;
1659}
1660
1661#define sample_valid(samples) ((samples) > 80)
1662
1663/*
1664 * Disable idle window if the process thinks too long or seeks so much that
1665 * it doesn't matter
1666 */
1667static void
1668cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1669 struct cfq_io_context *cic)
1670{
Jens Axboe3b181522005-06-27 10:56:24 +02001671 int enable_idle = cfq_cfqq_idle_window(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02001672
1673 if (!cic->ioc->task || !cfqd->cfq_slice_idle)
1674 enable_idle = 0;
1675 else if (sample_valid(cic->ttime_samples)) {
1676 if (cic->ttime_mean > cfqd->cfq_slice_idle)
1677 enable_idle = 0;
1678 else
1679 enable_idle = 1;
1680 }
1681
Jens Axboe3b181522005-06-27 10:56:24 +02001682 if (enable_idle)
1683 cfq_mark_cfqq_idle_window(cfqq);
1684 else
1685 cfq_clear_cfqq_idle_window(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02001686}
1687
1688
1689/*
1690 * Check if new_cfqq should preempt the currently active queue. Return 0 for
1691 * no or if we aren't sure, a 1 will cause a preempt.
1692 */
1693static int
1694cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
1695 struct cfq_rq *crq)
1696{
1697 struct cfq_queue *cfqq = cfqd->active_queue;
1698
1699 if (cfq_class_idle(new_cfqq))
1700 return 0;
1701
1702 if (!cfqq)
1703 return 1;
1704
1705 if (cfq_class_idle(cfqq))
1706 return 1;
Jens Axboe3b181522005-06-27 10:56:24 +02001707 if (!cfq_cfqq_wait_request(new_cfqq))
Jens Axboe22e2c502005-06-27 10:55:12 +02001708 return 0;
1709 /*
1710 * if it doesn't have slice left, forget it
1711 */
1712 if (new_cfqq->slice_left < cfqd->cfq_slice_idle)
1713 return 0;
Jens Axboe3b181522005-06-27 10:56:24 +02001714 if (cfq_crq_is_sync(crq) && !cfq_cfqq_sync(cfqq))
Jens Axboe22e2c502005-06-27 10:55:12 +02001715 return 1;
1716
1717 return 0;
1718}
1719
1720/*
1721 * cfqq preempts the active queue. if we allowed preempt with no slice left,
1722 * let it have half of its nominal slice.
1723 */
1724static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1725{
1726 struct cfq_queue *__cfqq, *next;
1727
1728 list_for_each_entry_safe(__cfqq, next, &cfqd->cur_rr, cfq_list)
1729 cfq_resort_rr_list(__cfqq, 1);
1730
1731 if (!cfqq->slice_left)
1732 cfqq->slice_left = cfq_prio_to_slice(cfqd, cfqq) / 2;
1733
1734 cfqq->slice_end = cfqq->slice_left + jiffies;
Jens Axboe3b181522005-06-27 10:56:24 +02001735 __cfq_slice_expired(cfqd, cfqq, 1);
Jens Axboe22e2c502005-06-27 10:55:12 +02001736 __cfq_set_active_queue(cfqd, cfqq);
1737}
1738
1739/*
1740 * should really be a ll_rw_blk.c helper
1741 */
1742static void cfq_start_queueing(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1743{
1744 request_queue_t *q = cfqd->queue;
1745
1746 if (!blk_queue_plugged(q))
1747 q->request_fn(q);
1748 else
1749 __generic_unplug_device(q);
1750}
1751
1752/*
1753 * Called when a new fs request (crq) is added (to cfqq). Check if there's
1754 * something we should do about it
1755 */
1756static void
1757cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1758 struct cfq_rq *crq)
1759{
Jens Axboe3b181522005-06-27 10:56:24 +02001760 const int sync = cfq_crq_is_sync(crq);
Jens Axboe22e2c502005-06-27 10:55:12 +02001761
1762 cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq);
1763
1764 if (sync) {
1765 struct cfq_io_context *cic = crq->io_context;
1766
1767 cfq_update_io_thinktime(cfqd, cic);
1768 cfq_update_idle_window(cfqd, cfqq, cic);
1769
1770 cic->last_queue = jiffies;
1771 }
1772
1773 if (cfqq == cfqd->active_queue) {
1774 /*
1775 * if we are waiting for a request for this queue, let it rip
1776 * immediately and flag that we must not expire this queue
1777 * just now
1778 */
Jens Axboe3b181522005-06-27 10:56:24 +02001779 if (cfq_cfqq_wait_request(cfqq)) {
1780 cfq_mark_cfqq_must_dispatch(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02001781 del_timer(&cfqd->idle_slice_timer);
1782 cfq_start_queueing(cfqd, cfqq);
1783 }
1784 } else if (cfq_should_preempt(cfqd, cfqq, crq)) {
1785 /*
1786 * not the active queue - expire current slice if it is
1787 * idle and has expired it's mean thinktime or this new queue
1788 * has some old slice time left and is of higher priority
1789 */
1790 cfq_preempt_queue(cfqd, cfqq);
Jens Axboe3b181522005-06-27 10:56:24 +02001791 cfq_mark_cfqq_must_dispatch(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02001792 cfq_start_queueing(cfqd, cfqq);
1793 }
1794}
1795
1796static void cfq_enqueue(struct cfq_data *cfqd, struct request *rq)
1797{
1798 struct cfq_rq *crq = RQ_DATA(rq);
1799 struct cfq_queue *cfqq = crq->cfq_queue;
1800
1801 cfq_init_prio_data(cfqq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802
1803 cfq_add_crq_rb(crq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001804
Jens Axboe22e2c502005-06-27 10:55:12 +02001805 list_add_tail(&rq->queuelist, &cfqq->fifo);
1806
1807 if (rq_mergeable(rq)) {
1808 cfq_add_crq_hash(cfqd, crq);
1809
1810 if (!cfqd->queue->last_merge)
1811 cfqd->queue->last_merge = rq;
1812 }
1813
1814 cfq_crq_enqueued(cfqd, cfqq, crq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001815}
1816
1817static void
1818cfq_insert_request(request_queue_t *q, struct request *rq, int where)
1819{
1820 struct cfq_data *cfqd = q->elevator->elevator_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001821
1822 switch (where) {
1823 case ELEVATOR_INSERT_BACK:
Jens Axboe22e2c502005-06-27 10:55:12 +02001824 while (cfq_dispatch_requests(q, INT_MAX, 1))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001825 ;
1826 list_add_tail(&rq->queuelist, &q->queue_head);
Jens Axboe22e2c502005-06-27 10:55:12 +02001827 /*
1828 * If we were idling with pending requests on
1829 * inactive cfqqs, force dispatching will
1830 * remove the idle timer and the queue won't
1831 * be kicked by __make_request() afterward.
1832 * Kick it here.
1833 */
Jens Axboe3b181522005-06-27 10:56:24 +02001834 cfq_schedule_dispatch(cfqd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001835 break;
1836 case ELEVATOR_INSERT_FRONT:
1837 list_add(&rq->queuelist, &q->queue_head);
1838 break;
1839 case ELEVATOR_INSERT_SORT:
1840 BUG_ON(!blk_fs_request(rq));
Jens Axboe22e2c502005-06-27 10:55:12 +02001841 cfq_enqueue(cfqd, rq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001842 break;
1843 default:
1844 printk("%s: bad insert point %d\n", __FUNCTION__,where);
1845 return;
1846 }
Jens Axboe22e2c502005-06-27 10:55:12 +02001847}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001848
Jens Axboe22e2c502005-06-27 10:55:12 +02001849static inline int cfq_pending_requests(struct cfq_data *cfqd)
1850{
1851 return !list_empty(&cfqd->queue->queue_head) || cfqd->busy_queues;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001852}
1853
1854static int cfq_queue_empty(request_queue_t *q)
1855{
1856 struct cfq_data *cfqd = q->elevator->elevator_data;
1857
Jens Axboe22e2c502005-06-27 10:55:12 +02001858 return !cfq_pending_requests(cfqd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859}
1860
1861static void cfq_completed_request(request_queue_t *q, struct request *rq)
1862{
1863 struct cfq_rq *crq = RQ_DATA(rq);
1864 struct cfq_queue *cfqq;
1865
1866 if (unlikely(!blk_fs_request(rq)))
1867 return;
1868
1869 cfqq = crq->cfq_queue;
1870
Jens Axboe3b181522005-06-27 10:56:24 +02001871 if (cfq_crq_in_flight(crq)) {
1872 const int sync = cfq_crq_is_sync(crq);
1873
1874 WARN_ON(!cfqq->on_dispatch[sync]);
1875 cfqq->on_dispatch[sync]--;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001876 }
1877
1878 cfq_account_completion(cfqq, crq);
1879}
1880
1881static struct request *
1882cfq_former_request(request_queue_t *q, struct request *rq)
1883{
1884 struct cfq_rq *crq = RQ_DATA(rq);
1885 struct rb_node *rbprev = rb_prev(&crq->rb_node);
1886
1887 if (rbprev)
1888 return rb_entry_crq(rbprev)->request;
1889
1890 return NULL;
1891}
1892
1893static struct request *
1894cfq_latter_request(request_queue_t *q, struct request *rq)
1895{
1896 struct cfq_rq *crq = RQ_DATA(rq);
1897 struct rb_node *rbnext = rb_next(&crq->rb_node);
1898
1899 if (rbnext)
1900 return rb_entry_crq(rbnext)->request;
1901
1902 return NULL;
1903}
1904
Jens Axboe22e2c502005-06-27 10:55:12 +02001905/*
1906 * we temporarily boost lower priority queues if they are holding fs exclusive
1907 * resources. they are boosted to normal prio (CLASS_BE/4)
1908 */
1909static void cfq_prio_boost(struct cfq_queue *cfqq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001910{
Jens Axboe22e2c502005-06-27 10:55:12 +02001911 const int ioprio_class = cfqq->ioprio_class;
1912 const int ioprio = cfqq->ioprio;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001913
Jens Axboe22e2c502005-06-27 10:55:12 +02001914 if (has_fs_excl()) {
1915 /*
1916 * boost idle prio on transactions that would lock out other
1917 * users of the filesystem
1918 */
1919 if (cfq_class_idle(cfqq))
1920 cfqq->ioprio_class = IOPRIO_CLASS_BE;
1921 if (cfqq->ioprio > IOPRIO_NORM)
1922 cfqq->ioprio = IOPRIO_NORM;
1923 } else {
1924 /*
1925 * check if we need to unboost the queue
1926 */
1927 if (cfqq->ioprio_class != cfqq->org_ioprio_class)
1928 cfqq->ioprio_class = cfqq->org_ioprio_class;
1929 if (cfqq->ioprio != cfqq->org_ioprio)
1930 cfqq->ioprio = cfqq->org_ioprio;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001931 }
1932
Jens Axboe22e2c502005-06-27 10:55:12 +02001933 /*
1934 * refile between round-robin lists if we moved the priority class
1935 */
1936 if ((ioprio_class != cfqq->ioprio_class || ioprio != cfqq->ioprio) &&
Jens Axboe3b181522005-06-27 10:56:24 +02001937 cfq_cfqq_on_rr(cfqq))
Jens Axboe22e2c502005-06-27 10:55:12 +02001938 cfq_resort_rr_list(cfqq, 0);
1939}
1940
1941static inline pid_t cfq_queue_pid(struct task_struct *task, int rw)
1942{
1943 if (rw == READ || process_sync(task))
1944 return task->pid;
1945
1946 return CFQ_KEY_ASYNC;
1947}
1948
1949static inline int
1950__cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1951 struct task_struct *task, int rw)
1952{
Jens Axboe3b181522005-06-27 10:56:24 +02001953#if 1
1954 if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) &&
1955 !cfq_cfqq_must_alloc_slice) {
1956 cfq_mark_cfqq_must_alloc_slice(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02001957 return ELV_MQUEUE_MUST;
Jens Axboe3b181522005-06-27 10:56:24 +02001958 }
Jens Axboe22e2c502005-06-27 10:55:12 +02001959
1960 return ELV_MQUEUE_MAY;
Jens Axboe3b181522005-06-27 10:56:24 +02001961#else
Jens Axboe22e2c502005-06-27 10:55:12 +02001962 if (!cfqq || task->flags & PF_MEMALLOC)
1963 return ELV_MQUEUE_MAY;
Jens Axboe3b181522005-06-27 10:56:24 +02001964 if (!cfqq->allocated[rw] || cfq_cfqq_must_alloc(cfqq)) {
1965 if (cfq_cfqq_wait_request(cfqq))
Jens Axboe22e2c502005-06-27 10:55:12 +02001966 return ELV_MQUEUE_MUST;
1967
1968 /*
1969 * only allow 1 ELV_MQUEUE_MUST per slice, otherwise we
1970 * can quickly flood the queue with writes from a single task
1971 */
Jens Axboe3b181522005-06-27 10:56:24 +02001972 if (rw == READ || !cfq_cfqq_must_alloc_slice) {
1973 cfq_mark_cfqq_must_alloc_slice(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02001974 return ELV_MQUEUE_MUST;
1975 }
1976
1977 return ELV_MQUEUE_MAY;
1978 }
1979 if (cfq_class_idle(cfqq))
1980 return ELV_MQUEUE_NO;
1981 if (cfqq->allocated[rw] >= cfqd->max_queued) {
1982 struct io_context *ioc = get_io_context(GFP_ATOMIC);
1983 int ret = ELV_MQUEUE_NO;
1984
1985 if (ioc && ioc->nr_batch_requests)
1986 ret = ELV_MQUEUE_MAY;
1987
1988 put_io_context(ioc);
1989 return ret;
1990 }
1991
1992 return ELV_MQUEUE_MAY;
1993#endif
1994}
1995
1996static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio)
1997{
1998 struct cfq_data *cfqd = q->elevator->elevator_data;
1999 struct task_struct *tsk = current;
2000 struct cfq_queue *cfqq;
2001
2002 /*
2003 * don't force setup of a queue from here, as a call to may_queue
2004 * does not necessarily imply that a request actually will be queued.
2005 * so just lookup a possibly existing queue, or return 'may queue'
2006 * if that fails
2007 */
Jens Axboe3b181522005-06-27 10:56:24 +02002008 cfqq = cfq_find_cfq_hash(cfqd, cfq_queue_pid(tsk, rw), tsk->ioprio);
Jens Axboe22e2c502005-06-27 10:55:12 +02002009 if (cfqq) {
2010 cfq_init_prio_data(cfqq);
2011 cfq_prio_boost(cfqq);
2012
2013 return __cfq_may_queue(cfqd, cfqq, tsk, rw);
2014 }
2015
2016 return ELV_MQUEUE_MAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002017}
2018
2019static void cfq_check_waiters(request_queue_t *q, struct cfq_queue *cfqq)
2020{
Jens Axboe22e2c502005-06-27 10:55:12 +02002021 struct cfq_data *cfqd = q->elevator->elevator_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002022 struct request_list *rl = &q->rq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002023
Jens Axboe22e2c502005-06-27 10:55:12 +02002024 if (cfqq->allocated[READ] <= cfqd->max_queued || cfqd->rq_starved) {
2025 smp_mb();
2026 if (waitqueue_active(&rl->wait[READ]))
2027 wake_up(&rl->wait[READ]);
2028 }
2029
2030 if (cfqq->allocated[WRITE] <= cfqd->max_queued || cfqd->rq_starved) {
2031 smp_mb();
2032 if (waitqueue_active(&rl->wait[WRITE]))
2033 wake_up(&rl->wait[WRITE]);
2034 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002035}
2036
2037/*
2038 * queue lock held here
2039 */
2040static void cfq_put_request(request_queue_t *q, struct request *rq)
2041{
2042 struct cfq_data *cfqd = q->elevator->elevator_data;
2043 struct cfq_rq *crq = RQ_DATA(rq);
2044
2045 if (crq) {
2046 struct cfq_queue *cfqq = crq->cfq_queue;
Jens Axboe22e2c502005-06-27 10:55:12 +02002047 const int rw = rq_data_dir(rq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002048
Jens Axboe22e2c502005-06-27 10:55:12 +02002049 BUG_ON(!cfqq->allocated[rw]);
2050 cfqq->allocated[rw]--;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002051
Jens Axboe22e2c502005-06-27 10:55:12 +02002052 put_io_context(crq->io_context->ioc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002053
2054 mempool_free(crq, cfqd->crq_pool);
2055 rq->elevator_private = NULL;
2056
Linus Torvalds1da177e2005-04-16 15:20:36 -07002057 cfq_check_waiters(q, cfqq);
2058 cfq_put_queue(cfqq);
2059 }
2060}
2061
2062/*
Jens Axboe22e2c502005-06-27 10:55:12 +02002063 * Allocate cfq data structures associated with this request.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002064 */
Jens Axboe22e2c502005-06-27 10:55:12 +02002065static int
2066cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
2067 int gfp_mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002068{
2069 struct cfq_data *cfqd = q->elevator->elevator_data;
Jens Axboe3b181522005-06-27 10:56:24 +02002070 struct task_struct *tsk = current;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002071 struct cfq_io_context *cic;
2072 const int rw = rq_data_dir(rq);
Jens Axboe3b181522005-06-27 10:56:24 +02002073 pid_t key = cfq_queue_pid(tsk, rw);
Jens Axboe22e2c502005-06-27 10:55:12 +02002074 struct cfq_queue *cfqq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002075 struct cfq_rq *crq;
2076 unsigned long flags;
2077
2078 might_sleep_if(gfp_mask & __GFP_WAIT);
2079
Jens Axboe3b181522005-06-27 10:56:24 +02002080 cic = cfq_get_io_context(cfqd, key, gfp_mask);
Jens Axboe22e2c502005-06-27 10:55:12 +02002081
Linus Torvalds1da177e2005-04-16 15:20:36 -07002082 spin_lock_irqsave(q->queue_lock, flags);
2083
Jens Axboe22e2c502005-06-27 10:55:12 +02002084 if (!cic)
2085 goto queue_fail;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002086
Jens Axboe22e2c502005-06-27 10:55:12 +02002087 if (!cic->cfqq) {
Jens Axboe3b181522005-06-27 10:56:24 +02002088 cfqq = cfq_get_queue(cfqd, key, tsk->ioprio, gfp_mask);
Jens Axboe22e2c502005-06-27 10:55:12 +02002089 if (!cfqq)
2090 goto queue_fail;
2091
2092 cic->cfqq = cfqq;
2093 } else
2094 cfqq = cic->cfqq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002095
2096 cfqq->allocated[rw]++;
Jens Axboe3b181522005-06-27 10:56:24 +02002097 cfq_clear_cfqq_must_alloc(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02002098 cfqd->rq_starved = 0;
2099 atomic_inc(&cfqq->ref);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002100 spin_unlock_irqrestore(q->queue_lock, flags);
2101
Linus Torvalds1da177e2005-04-16 15:20:36 -07002102 crq = mempool_alloc(cfqd->crq_pool, gfp_mask);
2103 if (crq) {
2104 RB_CLEAR(&crq->rb_node);
2105 crq->rb_key = 0;
2106 crq->request = rq;
2107 INIT_HLIST_NODE(&crq->hash);
2108 crq->cfq_queue = cfqq;
2109 crq->io_context = cic;
Jens Axboe3b181522005-06-27 10:56:24 +02002110 cfq_clear_crq_in_flight(crq);
2111 cfq_clear_crq_in_driver(crq);
2112 cfq_clear_crq_requeued(crq);
2113
2114 if (rw == READ || process_sync(tsk))
2115 cfq_mark_crq_is_sync(crq);
2116 else
2117 cfq_clear_crq_is_sync(crq);
2118
Linus Torvalds1da177e2005-04-16 15:20:36 -07002119 rq->elevator_private = crq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002120 return 0;
2121 }
2122
Linus Torvalds1da177e2005-04-16 15:20:36 -07002123 spin_lock_irqsave(q->queue_lock, flags);
2124 cfqq->allocated[rw]--;
Jens Axboe22e2c502005-06-27 10:55:12 +02002125 if (!(cfqq->allocated[0] + cfqq->allocated[1]))
Jens Axboe3b181522005-06-27 10:56:24 +02002126 cfq_mark_cfqq_must_alloc(cfqq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002127 cfq_put_queue(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02002128queue_fail:
2129 if (cic)
2130 put_io_context(cic->ioc);
2131 /*
2132 * mark us rq allocation starved. we need to kickstart the process
2133 * ourselves if there are no pending requests that can do it for us.
2134 * that would be an extremely rare OOM situation
2135 */
2136 cfqd->rq_starved = 1;
Jens Axboe3b181522005-06-27 10:56:24 +02002137 cfq_schedule_dispatch(cfqd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002138 spin_unlock_irqrestore(q->queue_lock, flags);
2139 return 1;
2140}
2141
Jens Axboe22e2c502005-06-27 10:55:12 +02002142static void cfq_kick_queue(void *data)
2143{
2144 request_queue_t *q = data;
2145 struct cfq_data *cfqd = q->elevator->elevator_data;
2146 unsigned long flags;
2147
2148 spin_lock_irqsave(q->queue_lock, flags);
2149
2150 if (cfqd->rq_starved) {
2151 struct request_list *rl = &q->rq;
2152
2153 /*
2154 * we aren't guaranteed to get a request after this, but we
2155 * have to be opportunistic
2156 */
2157 smp_mb();
2158 if (waitqueue_active(&rl->wait[READ]))
2159 wake_up(&rl->wait[READ]);
2160 if (waitqueue_active(&rl->wait[WRITE]))
2161 wake_up(&rl->wait[WRITE]);
2162 }
2163
2164 blk_remove_plug(q);
2165 q->request_fn(q);
2166 spin_unlock_irqrestore(q->queue_lock, flags);
2167}
2168
2169/*
2170 * Timer running if the active_queue is currently idling inside its time slice
2171 */
2172static void cfq_idle_slice_timer(unsigned long data)
2173{
2174 struct cfq_data *cfqd = (struct cfq_data *) data;
2175 struct cfq_queue *cfqq;
2176 unsigned long flags;
2177
2178 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
2179
2180 if ((cfqq = cfqd->active_queue) != NULL) {
2181 unsigned long now = jiffies;
2182
2183 /*
2184 * expired
2185 */
2186 if (time_after(now, cfqq->slice_end))
2187 goto expire;
2188
2189 /*
2190 * only expire and reinvoke request handler, if there are
2191 * other queues with pending requests
2192 */
2193 if (!cfq_pending_requests(cfqd)) {
2194 cfqd->idle_slice_timer.expires = min(now + cfqd->cfq_slice_idle, cfqq->slice_end);
2195 add_timer(&cfqd->idle_slice_timer);
2196 goto out_cont;
2197 }
2198
2199 /*
2200 * not expired and it has a request pending, let it dispatch
2201 */
2202 if (!RB_EMPTY(&cfqq->sort_list)) {
Jens Axboe3b181522005-06-27 10:56:24 +02002203 cfq_mark_cfqq_must_dispatch(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02002204 goto out_kick;
2205 }
2206 }
2207expire:
2208 cfq_slice_expired(cfqd, 0);
2209out_kick:
Jens Axboe3b181522005-06-27 10:56:24 +02002210 cfq_schedule_dispatch(cfqd);
Jens Axboe22e2c502005-06-27 10:55:12 +02002211out_cont:
2212 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
2213}
2214
2215/*
2216 * Timer running if an idle class queue is waiting for service
2217 */
2218static void cfq_idle_class_timer(unsigned long data)
2219{
2220 struct cfq_data *cfqd = (struct cfq_data *) data;
2221 unsigned long flags, end;
2222
2223 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
2224
2225 /*
2226 * race with a non-idle queue, reset timer
2227 */
2228 end = cfqd->last_end_request + CFQ_IDLE_GRACE;
2229 if (!time_after_eq(jiffies, end)) {
2230 cfqd->idle_class_timer.expires = end;
2231 add_timer(&cfqd->idle_class_timer);
2232 } else
Jens Axboe3b181522005-06-27 10:56:24 +02002233 cfq_schedule_dispatch(cfqd);
Jens Axboe22e2c502005-06-27 10:55:12 +02002234
2235 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
2236}
2237
Jens Axboe3b181522005-06-27 10:56:24 +02002238static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
2239{
2240 del_timer_sync(&cfqd->idle_slice_timer);
2241 del_timer_sync(&cfqd->idle_class_timer);
2242 blk_sync_queue(cfqd->queue);
2243}
Jens Axboe22e2c502005-06-27 10:55:12 +02002244
Linus Torvalds1da177e2005-04-16 15:20:36 -07002245static void cfq_put_cfqd(struct cfq_data *cfqd)
2246{
2247 request_queue_t *q = cfqd->queue;
2248
2249 if (!atomic_dec_and_test(&cfqd->ref))
2250 return;
2251
Jens Axboe3b181522005-06-27 10:56:24 +02002252 cfq_shutdown_timer_wq(cfqd);
Jens Axboe22e2c502005-06-27 10:55:12 +02002253
Linus Torvalds1da177e2005-04-16 15:20:36 -07002254 blk_put_queue(q);
2255
2256 mempool_destroy(cfqd->crq_pool);
2257 kfree(cfqd->crq_hash);
2258 kfree(cfqd->cfq_hash);
2259 kfree(cfqd);
2260}
2261
2262static void cfq_exit_queue(elevator_t *e)
2263{
Jens Axboe22e2c502005-06-27 10:55:12 +02002264 struct cfq_data *cfqd = e->elevator_data;
2265
Jens Axboe3b181522005-06-27 10:56:24 +02002266 cfq_shutdown_timer_wq(cfqd);
Jens Axboe22e2c502005-06-27 10:55:12 +02002267 cfq_put_cfqd(cfqd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002268}
2269
2270static int cfq_init_queue(request_queue_t *q, elevator_t *e)
2271{
2272 struct cfq_data *cfqd;
2273 int i;
2274
2275 cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL);
2276 if (!cfqd)
2277 return -ENOMEM;
2278
2279 memset(cfqd, 0, sizeof(*cfqd));
Jens Axboe22e2c502005-06-27 10:55:12 +02002280
2281 for (i = 0; i < CFQ_PRIO_LISTS; i++)
2282 INIT_LIST_HEAD(&cfqd->rr_list[i]);
2283
2284 INIT_LIST_HEAD(&cfqd->busy_rr);
2285 INIT_LIST_HEAD(&cfqd->cur_rr);
2286 INIT_LIST_HEAD(&cfqd->idle_rr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002287 INIT_LIST_HEAD(&cfqd->empty_list);
2288
2289 cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL);
2290 if (!cfqd->crq_hash)
2291 goto out_crqhash;
2292
2293 cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL);
2294 if (!cfqd->cfq_hash)
2295 goto out_cfqhash;
2296
2297 cfqd->crq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, crq_pool);
2298 if (!cfqd->crq_pool)
2299 goto out_crqpool;
2300
2301 for (i = 0; i < CFQ_MHASH_ENTRIES; i++)
2302 INIT_HLIST_HEAD(&cfqd->crq_hash[i]);
2303 for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
2304 INIT_HLIST_HEAD(&cfqd->cfq_hash[i]);
2305
2306 e->elevator_data = cfqd;
2307
2308 cfqd->queue = q;
2309 atomic_inc(&q->refcnt);
2310
Jens Axboe22e2c502005-06-27 10:55:12 +02002311 cfqd->max_queued = q->nr_requests / 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002312 q->nr_batching = cfq_queued;
Jens Axboe22e2c502005-06-27 10:55:12 +02002313
2314 init_timer(&cfqd->idle_slice_timer);
2315 cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
2316 cfqd->idle_slice_timer.data = (unsigned long) cfqd;
2317
2318 init_timer(&cfqd->idle_class_timer);
2319 cfqd->idle_class_timer.function = cfq_idle_class_timer;
2320 cfqd->idle_class_timer.data = (unsigned long) cfqd;
2321
2322 INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q);
2323
Linus Torvalds1da177e2005-04-16 15:20:36 -07002324 atomic_set(&cfqd->ref, 1);
2325
2326 cfqd->cfq_queued = cfq_queued;
2327 cfqd->cfq_quantum = cfq_quantum;
Jens Axboe22e2c502005-06-27 10:55:12 +02002328 cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
2329 cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002330 cfqd->cfq_back_max = cfq_back_max;
2331 cfqd->cfq_back_penalty = cfq_back_penalty;
Jens Axboe22e2c502005-06-27 10:55:12 +02002332 cfqd->cfq_slice[0] = cfq_slice_async;
2333 cfqd->cfq_slice[1] = cfq_slice_sync;
2334 cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
2335 cfqd->cfq_slice_idle = cfq_slice_idle;
2336 cfqd->cfq_max_depth = cfq_max_depth;
Jens Axboe3b181522005-06-27 10:56:24 +02002337
Linus Torvalds1da177e2005-04-16 15:20:36 -07002338 return 0;
2339out_crqpool:
2340 kfree(cfqd->cfq_hash);
2341out_cfqhash:
2342 kfree(cfqd->crq_hash);
2343out_crqhash:
2344 kfree(cfqd);
2345 return -ENOMEM;
2346}
2347
2348static void cfq_slab_kill(void)
2349{
2350 if (crq_pool)
2351 kmem_cache_destroy(crq_pool);
2352 if (cfq_pool)
2353 kmem_cache_destroy(cfq_pool);
2354 if (cfq_ioc_pool)
2355 kmem_cache_destroy(cfq_ioc_pool);
2356}
2357
2358static int __init cfq_slab_setup(void)
2359{
2360 crq_pool = kmem_cache_create("crq_pool", sizeof(struct cfq_rq), 0, 0,
2361 NULL, NULL);
2362 if (!crq_pool)
2363 goto fail;
2364
2365 cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0,
2366 NULL, NULL);
2367 if (!cfq_pool)
2368 goto fail;
2369
2370 cfq_ioc_pool = kmem_cache_create("cfq_ioc_pool",
2371 sizeof(struct cfq_io_context), 0, 0, NULL, NULL);
2372 if (!cfq_ioc_pool)
2373 goto fail;
2374
2375 return 0;
2376fail:
2377 cfq_slab_kill();
2378 return -ENOMEM;
2379}
2380
Linus Torvalds1da177e2005-04-16 15:20:36 -07002381/*
2382 * sysfs parts below -->
2383 */
2384struct cfq_fs_entry {
2385 struct attribute attr;
2386 ssize_t (*show)(struct cfq_data *, char *);
2387 ssize_t (*store)(struct cfq_data *, const char *, size_t);
2388};
2389
2390static ssize_t
2391cfq_var_show(unsigned int var, char *page)
2392{
2393 return sprintf(page, "%d\n", var);
2394}
2395
2396static ssize_t
2397cfq_var_store(unsigned int *var, const char *page, size_t count)
2398{
2399 char *p = (char *) page;
2400
2401 *var = simple_strtoul(p, &p, 10);
2402 return count;
2403}
2404
Linus Torvalds1da177e2005-04-16 15:20:36 -07002405#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
2406static ssize_t __FUNC(struct cfq_data *cfqd, char *page) \
2407{ \
2408 unsigned int __data = __VAR; \
2409 if (__CONV) \
2410 __data = jiffies_to_msecs(__data); \
2411 return cfq_var_show(__data, (page)); \
2412}
2413SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
2414SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued, 0);
Jens Axboe22e2c502005-06-27 10:55:12 +02002415SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1);
2416SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002417SHOW_FUNCTION(cfq_back_max_show, cfqd->cfq_back_max, 0);
2418SHOW_FUNCTION(cfq_back_penalty_show, cfqd->cfq_back_penalty, 0);
Jens Axboe22e2c502005-06-27 10:55:12 +02002419SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1);
2420SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
2421SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
2422SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
2423SHOW_FUNCTION(cfq_max_depth_show, cfqd->cfq_max_depth, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002424#undef SHOW_FUNCTION
2425
2426#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
2427static ssize_t __FUNC(struct cfq_data *cfqd, const char *page, size_t count) \
2428{ \
2429 unsigned int __data; \
2430 int ret = cfq_var_store(&__data, (page), count); \
2431 if (__data < (MIN)) \
2432 __data = (MIN); \
2433 else if (__data > (MAX)) \
2434 __data = (MAX); \
2435 if (__CONV) \
2436 *(__PTR) = msecs_to_jiffies(__data); \
2437 else \
2438 *(__PTR) = __data; \
2439 return ret; \
2440}
2441STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0);
2442STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, UINT_MAX, 0);
Jens Axboe22e2c502005-06-27 10:55:12 +02002443STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, UINT_MAX, 1);
2444STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, UINT_MAX, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002445STORE_FUNCTION(cfq_back_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0);
2446STORE_FUNCTION(cfq_back_penalty_store, &cfqd->cfq_back_penalty, 1, UINT_MAX, 0);
Jens Axboe22e2c502005-06-27 10:55:12 +02002447STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1);
2448STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);
2449STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
2450STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0);
2451STORE_FUNCTION(cfq_max_depth_store, &cfqd->cfq_max_depth, 1, UINT_MAX, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002452#undef STORE_FUNCTION
2453
2454static struct cfq_fs_entry cfq_quantum_entry = {
2455 .attr = {.name = "quantum", .mode = S_IRUGO | S_IWUSR },
2456 .show = cfq_quantum_show,
2457 .store = cfq_quantum_store,
2458};
2459static struct cfq_fs_entry cfq_queued_entry = {
2460 .attr = {.name = "queued", .mode = S_IRUGO | S_IWUSR },
2461 .show = cfq_queued_show,
2462 .store = cfq_queued_store,
2463};
Jens Axboe22e2c502005-06-27 10:55:12 +02002464static struct cfq_fs_entry cfq_fifo_expire_sync_entry = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002465 .attr = {.name = "fifo_expire_sync", .mode = S_IRUGO | S_IWUSR },
Jens Axboe22e2c502005-06-27 10:55:12 +02002466 .show = cfq_fifo_expire_sync_show,
2467 .store = cfq_fifo_expire_sync_store,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002468};
Jens Axboe22e2c502005-06-27 10:55:12 +02002469static struct cfq_fs_entry cfq_fifo_expire_async_entry = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002470 .attr = {.name = "fifo_expire_async", .mode = S_IRUGO | S_IWUSR },
Jens Axboe22e2c502005-06-27 10:55:12 +02002471 .show = cfq_fifo_expire_async_show,
2472 .store = cfq_fifo_expire_async_store,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002473};
2474static struct cfq_fs_entry cfq_back_max_entry = {
2475 .attr = {.name = "back_seek_max", .mode = S_IRUGO | S_IWUSR },
2476 .show = cfq_back_max_show,
2477 .store = cfq_back_max_store,
2478};
2479static struct cfq_fs_entry cfq_back_penalty_entry = {
2480 .attr = {.name = "back_seek_penalty", .mode = S_IRUGO | S_IWUSR },
2481 .show = cfq_back_penalty_show,
2482 .store = cfq_back_penalty_store,
2483};
Jens Axboe22e2c502005-06-27 10:55:12 +02002484static struct cfq_fs_entry cfq_slice_sync_entry = {
2485 .attr = {.name = "slice_sync", .mode = S_IRUGO | S_IWUSR },
2486 .show = cfq_slice_sync_show,
2487 .store = cfq_slice_sync_store,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002488};
Jens Axboe22e2c502005-06-27 10:55:12 +02002489static struct cfq_fs_entry cfq_slice_async_entry = {
2490 .attr = {.name = "slice_async", .mode = S_IRUGO | S_IWUSR },
2491 .show = cfq_slice_async_show,
2492 .store = cfq_slice_async_store,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002493};
Jens Axboe22e2c502005-06-27 10:55:12 +02002494static struct cfq_fs_entry cfq_slice_async_rq_entry = {
2495 .attr = {.name = "slice_async_rq", .mode = S_IRUGO | S_IWUSR },
2496 .show = cfq_slice_async_rq_show,
2497 .store = cfq_slice_async_rq_store,
2498};
2499static struct cfq_fs_entry cfq_slice_idle_entry = {
2500 .attr = {.name = "slice_idle", .mode = S_IRUGO | S_IWUSR },
2501 .show = cfq_slice_idle_show,
2502 .store = cfq_slice_idle_store,
2503};
2504static struct cfq_fs_entry cfq_max_depth_entry = {
2505 .attr = {.name = "max_depth", .mode = S_IRUGO | S_IWUSR },
2506 .show = cfq_max_depth_show,
2507 .store = cfq_max_depth_store,
2508};
Jens Axboe3b181522005-06-27 10:56:24 +02002509
Linus Torvalds1da177e2005-04-16 15:20:36 -07002510static struct attribute *default_attrs[] = {
2511 &cfq_quantum_entry.attr,
2512 &cfq_queued_entry.attr,
Jens Axboe22e2c502005-06-27 10:55:12 +02002513 &cfq_fifo_expire_sync_entry.attr,
2514 &cfq_fifo_expire_async_entry.attr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002515 &cfq_back_max_entry.attr,
2516 &cfq_back_penalty_entry.attr,
Jens Axboe22e2c502005-06-27 10:55:12 +02002517 &cfq_slice_sync_entry.attr,
2518 &cfq_slice_async_entry.attr,
2519 &cfq_slice_async_rq_entry.attr,
2520 &cfq_slice_idle_entry.attr,
2521 &cfq_max_depth_entry.attr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002522 NULL,
2523};
2524
2525#define to_cfq(atr) container_of((atr), struct cfq_fs_entry, attr)
2526
2527static ssize_t
2528cfq_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
2529{
2530 elevator_t *e = container_of(kobj, elevator_t, kobj);
2531 struct cfq_fs_entry *entry = to_cfq(attr);
2532
2533 if (!entry->show)
Dmitry Torokhov6c1852a2005-04-29 01:26:06 -05002534 return -EIO;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002535
2536 return entry->show(e->elevator_data, page);
2537}
2538
2539static ssize_t
2540cfq_attr_store(struct kobject *kobj, struct attribute *attr,
2541 const char *page, size_t length)
2542{
2543 elevator_t *e = container_of(kobj, elevator_t, kobj);
2544 struct cfq_fs_entry *entry = to_cfq(attr);
2545
2546 if (!entry->store)
Dmitry Torokhov6c1852a2005-04-29 01:26:06 -05002547 return -EIO;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548
2549 return entry->store(e->elevator_data, page, length);
2550}
2551
2552static struct sysfs_ops cfq_sysfs_ops = {
2553 .show = cfq_attr_show,
2554 .store = cfq_attr_store,
2555};
2556
2557static struct kobj_type cfq_ktype = {
2558 .sysfs_ops = &cfq_sysfs_ops,
2559 .default_attrs = default_attrs,
2560};
2561
2562static struct elevator_type iosched_cfq = {
2563 .ops = {
2564 .elevator_merge_fn = cfq_merge,
2565 .elevator_merged_fn = cfq_merged_request,
2566 .elevator_merge_req_fn = cfq_merged_requests,
2567 .elevator_next_req_fn = cfq_next_request,
2568 .elevator_add_req_fn = cfq_insert_request,
2569 .elevator_remove_req_fn = cfq_remove_request,
2570 .elevator_requeue_req_fn = cfq_requeue_request,
2571 .elevator_deactivate_req_fn = cfq_deactivate_request,
2572 .elevator_queue_empty_fn = cfq_queue_empty,
2573 .elevator_completed_req_fn = cfq_completed_request,
2574 .elevator_former_req_fn = cfq_former_request,
2575 .elevator_latter_req_fn = cfq_latter_request,
2576 .elevator_set_req_fn = cfq_set_request,
2577 .elevator_put_req_fn = cfq_put_request,
2578 .elevator_may_queue_fn = cfq_may_queue,
2579 .elevator_init_fn = cfq_init_queue,
2580 .elevator_exit_fn = cfq_exit_queue,
2581 },
2582 .elevator_ktype = &cfq_ktype,
2583 .elevator_name = "cfq",
2584 .elevator_owner = THIS_MODULE,
2585};
2586
2587static int __init cfq_init(void)
2588{
2589 int ret;
2590
Jens Axboe22e2c502005-06-27 10:55:12 +02002591 /*
2592 * could be 0 on HZ < 1000 setups
2593 */
2594 if (!cfq_slice_async)
2595 cfq_slice_async = 1;
2596 if (!cfq_slice_idle)
2597 cfq_slice_idle = 1;
2598
Linus Torvalds1da177e2005-04-16 15:20:36 -07002599 if (cfq_slab_setup())
2600 return -ENOMEM;
2601
2602 ret = elv_register(&iosched_cfq);
Jens Axboe22e2c502005-06-27 10:55:12 +02002603 if (ret)
2604 cfq_slab_kill();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002605
Linus Torvalds1da177e2005-04-16 15:20:36 -07002606 return ret;
2607}
2608
2609static void __exit cfq_exit(void)
2610{
Jens Axboe22e2c502005-06-27 10:55:12 +02002611 struct task_struct *g, *p;
2612 unsigned long flags;
2613
2614 read_lock_irqsave(&tasklist_lock, flags);
2615
2616 /*
2617 * iterate each process in the system, removing our io_context
2618 */
2619 do_each_thread(g, p) {
2620 struct io_context *ioc = p->io_context;
2621
2622 if (ioc && ioc->cic) {
2623 ioc->cic->exit(ioc->cic);
2624 cfq_free_io_context(ioc->cic);
2625 ioc->cic = NULL;
2626 }
2627 } while_each_thread(g, p);
2628
2629 read_unlock_irqrestore(&tasklist_lock, flags);
2630
Linus Torvalds1da177e2005-04-16 15:20:36 -07002631 cfq_slab_kill();
2632 elv_unregister(&iosched_cfq);
2633}
2634
2635module_init(cfq_init);
2636module_exit(cfq_exit);
2637
2638MODULE_AUTHOR("Jens Axboe");
2639MODULE_LICENSE("GPL");
2640MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler");