blob: 13794477785985f21b68daeccb328af3702e3d24 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07002 * CFQ, or complete fairness queueing, disk scheduler.
3 *
4 * Based on ideas from a previously unfinished io
5 * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
6 *
Jens Axboe0fe23472006-09-04 15:41:16 +02007 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07009#include <linux/module.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090010#include <linux/slab.h>
Al Viro1cc9be62006-03-18 12:29:52 -050011#include <linux/blkdev.h>
12#include <linux/elevator.h>
Jeff Moyer9a7f38c2016-06-08 08:55:34 -060013#include <linux/ktime.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/rbtree.h>
Jens Axboe22e2c502005-06-27 10:55:12 +020015#include <linux/ioprio.h>
Jens Axboe7b679132008-05-30 12:23:07 +020016#include <linux/blktrace_api.h>
Tejun Heoeea8f412015-05-22 17:13:17 -040017#include <linux/blk-cgroup.h>
Tejun Heo6e736be2011-12-14 00:33:38 +010018#include "blk.h"
Jens Axboe87760e52016-11-09 12:38:14 -070019#include "blk-wbt.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070020
21/*
22 * tunables
23 */
Jens Axboefe094d92008-01-31 13:08:54 +010024/* max queue in one round of service */
Shaohua Liabc3c742010-03-01 09:20:54 +010025static const int cfq_quantum = 8;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -060026static const u64 cfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 };
Jens Axboefe094d92008-01-31 13:08:54 +010027/* maximum backwards seek, in KiB */
28static const int cfq_back_max = 16 * 1024;
29/* penalty of a backwards seek */
30static const int cfq_back_penalty = 2;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -060031static const u64 cfq_slice_sync = NSEC_PER_SEC / 10;
32static u64 cfq_slice_async = NSEC_PER_SEC / 25;
Arjan van de Ven64100092006-01-06 09:46:02 +010033static const int cfq_slice_async_rq = 2;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -060034static u64 cfq_slice_idle = NSEC_PER_SEC / 125;
35static u64 cfq_group_idle = NSEC_PER_SEC / 125;
36static const u64 cfq_target_latency = (u64)NSEC_PER_SEC * 3/10; /* 300 ms */
Corrado Zoccolo5db5d642009-10-26 22:44:04 +010037static const int cfq_hist_divisor = 4;
Jens Axboe22e2c502005-06-27 10:55:12 +020038
Jens Axboed9e76202007-04-20 14:27:50 +020039/*
Jens Axboe08717142008-01-28 11:38:15 +010040 * offset from end of service tree
Jens Axboed9e76202007-04-20 14:27:50 +020041 */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -060042#define CFQ_IDLE_DELAY (NSEC_PER_SEC / 5)
Jens Axboed9e76202007-04-20 14:27:50 +020043
44/*
45 * below this threshold, we consider thinktime immediate
46 */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -060047#define CFQ_MIN_TT (2 * NSEC_PER_SEC / HZ)
Jens Axboed9e76202007-04-20 14:27:50 +020048
Jens Axboe22e2c502005-06-27 10:55:12 +020049#define CFQ_SLICE_SCALE (5)
Aaron Carroll45333d52008-08-26 15:52:36 +020050#define CFQ_HW_QUEUE_MIN (5)
Vivek Goyal25bc6b02009-12-03 12:59:43 -050051#define CFQ_SERVICE_SHIFT 12
Jens Axboe22e2c502005-06-27 10:55:12 +020052
Corrado Zoccolo3dde36d2010-02-27 19:45:39 +010053#define CFQQ_SEEK_THR (sector_t)(8 * 100)
Shaohua Lie9ce3352010-03-19 08:03:04 +010054#define CFQQ_CLOSE_THR (sector_t)(8 * 1024)
Corrado Zoccolo41647e72010-02-27 19:45:40 +010055#define CFQQ_SECT_THR_NONROT (sector_t)(2 * 32)
Corrado Zoccolo3dde36d2010-02-27 19:45:39 +010056#define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8)
Shaohua Liae54abe2010-02-05 13:11:45 +010057
Tejun Heoa612fdd2011-12-14 00:33:41 +010058#define RQ_CIC(rq) icq_to_cic((rq)->elv.icq)
59#define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elv.priv[0])
60#define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elv.priv[1])
Linus Torvalds1da177e2005-04-16 15:20:36 -070061
Christoph Lametere18b8902006-12-06 20:33:20 -080062static struct kmem_cache *cfq_pool;
Linus Torvalds1da177e2005-04-16 15:20:36 -070063
Jens Axboe22e2c502005-06-27 10:55:12 +020064#define CFQ_PRIO_LISTS IOPRIO_BE_NR
65#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
Jens Axboe22e2c502005-06-27 10:55:12 +020066#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
67
Jens Axboe206dc692006-03-28 13:03:44 +020068#define sample_valid(samples) ((samples) > 80)
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -050069#define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node)
Jens Axboe206dc692006-03-28 13:03:44 +020070
Arianna Avanzinie48453c2015-06-05 23:38:42 +020071/* blkio-related constants */
Tejun Heo3ecca622015-08-18 14:55:35 -070072#define CFQ_WEIGHT_LEGACY_MIN 10
73#define CFQ_WEIGHT_LEGACY_DFL 500
74#define CFQ_WEIGHT_LEGACY_MAX 1000
Arianna Avanzinie48453c2015-06-05 23:38:42 +020075
Tejun Heoc5869802011-12-14 00:33:41 +010076struct cfq_ttime {
Jeff Moyer9a7f38c2016-06-08 08:55:34 -060077 u64 last_end_request;
Tejun Heoc5869802011-12-14 00:33:41 +010078
Jeff Moyer9a7f38c2016-06-08 08:55:34 -060079 u64 ttime_total;
80 u64 ttime_mean;
Tejun Heoc5869802011-12-14 00:33:41 +010081 unsigned long ttime_samples;
Tejun Heoc5869802011-12-14 00:33:41 +010082};
83
Jens Axboe22e2c502005-06-27 10:55:12 +020084/*
Jens Axboecc09e292007-04-26 12:53:50 +020085 * Most of our rbtree usage is for sorting with min extraction, so
86 * if we cache the leftmost node we don't have to walk down the tree
87 * to find it. Idea borrowed from Ingo Molnars CFS scheduler. We should
88 * move this into the elevator for the rq sorting as well.
89 */
90struct cfq_rb_root {
91 struct rb_root rb;
92 struct rb_node *left;
Corrado Zoccoloaa6f6a32009-10-26 22:44:33 +010093 unsigned count;
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -050094 u64 min_vdisktime;
Shaohua Lif5f2b6c2011-07-12 14:24:55 +020095 struct cfq_ttime ttime;
Jens Axboecc09e292007-04-26 12:53:50 +020096};
Shaohua Lif5f2b6c2011-07-12 14:24:55 +020097#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, \
Jeff Moyer9a7f38c2016-06-08 08:55:34 -060098 .ttime = {.last_end_request = ktime_get_ns(),},}
Jens Axboecc09e292007-04-26 12:53:50 +020099
100/*
Jens Axboe6118b702009-06-30 09:34:12 +0200101 * Per process-grouping structure
102 */
103struct cfq_queue {
104 /* reference count */
Shaohua Li30d7b942011-01-07 08:46:59 +0100105 int ref;
Jens Axboe6118b702009-06-30 09:34:12 +0200106 /* various state flags, see below */
107 unsigned int flags;
108 /* parent cfq_data */
109 struct cfq_data *cfqd;
110 /* service_tree member */
111 struct rb_node rb_node;
112 /* service_tree key */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600113 u64 rb_key;
Jens Axboe6118b702009-06-30 09:34:12 +0200114 /* prio tree member */
115 struct rb_node p_node;
116 /* prio tree root we belong to, if any */
117 struct rb_root *p_root;
118 /* sorted list of pending requests */
119 struct rb_root sort_list;
120 /* if fifo isn't expired, next request to serve */
121 struct request *next_rq;
122 /* requests queued in sort_list */
123 int queued[2];
124 /* currently allocated requests */
125 int allocated[2];
126 /* fifo list of requests in sort_list */
127 struct list_head fifo;
128
Vivek Goyaldae739e2009-12-03 12:59:45 -0500129 /* time when queue got scheduled in to dispatch first request. */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600130 u64 dispatch_start;
131 u64 allocated_slice;
132 u64 slice_dispatch;
Vivek Goyaldae739e2009-12-03 12:59:45 -0500133 /* time when first request from queue completed and slice started. */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600134 u64 slice_start;
135 u64 slice_end;
Jan Kara93fdf142016-06-28 09:04:00 +0200136 s64 slice_resid;
Jens Axboe6118b702009-06-30 09:34:12 +0200137
Christoph Hellwig65299a32011-08-23 14:50:29 +0200138 /* pending priority requests */
139 int prio_pending;
Jens Axboe6118b702009-06-30 09:34:12 +0200140 /* number of requests that are on the dispatch list or inside driver */
141 int dispatched;
142
143 /* io prio of this group */
144 unsigned short ioprio, org_ioprio;
Jens Axboeb8269db2016-06-09 15:47:29 -0600145 unsigned short ioprio_class, org_ioprio_class;
Jens Axboe6118b702009-06-30 09:34:12 +0200146
Richard Kennedyc4081ba2010-02-22 13:49:24 +0100147 pid_t pid;
148
Corrado Zoccolo3dde36d2010-02-27 19:45:39 +0100149 u32 seek_history;
Jeff Moyerb2c18e12009-10-23 17:14:49 -0400150 sector_t last_request_pos;
151
Corrado Zoccoloaa6f6a32009-10-26 22:44:33 +0100152 struct cfq_rb_root *service_tree;
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -0400153 struct cfq_queue *new_cfqq;
Vivek Goyalcdb16e82009-12-03 12:59:38 -0500154 struct cfq_group *cfqg;
Vivek Goyalc4e78932010-08-23 12:25:03 +0200155 /* Number of sectors dispatched from queue in single dispatch round */
156 unsigned long nr_sectors;
Jens Axboe6118b702009-06-30 09:34:12 +0200157};
158
159/*
Corrado Zoccolo718eee02009-10-26 22:45:29 +0100160 * First index in the service_trees.
Corrado Zoccoloc0324a02009-10-27 19:16:03 +0100161 * IDLE is handled separately, so it has negative index
162 */
Vivek Goyal3bf10fe2012-10-03 16:56:56 -0400163enum wl_class_t {
Corrado Zoccoloc0324a02009-10-27 19:16:03 +0100164 BE_WORKLOAD = 0,
Vivek Goyal615f0252009-12-03 12:59:39 -0500165 RT_WORKLOAD = 1,
166 IDLE_WORKLOAD = 2,
Vivek Goyalb4627322010-10-22 09:48:43 +0200167 CFQ_PRIO_NR,
Corrado Zoccoloc0324a02009-10-27 19:16:03 +0100168};
169
170/*
Corrado Zoccolo718eee02009-10-26 22:45:29 +0100171 * Second index in the service_trees.
172 */
173enum wl_type_t {
174 ASYNC_WORKLOAD = 0,
175 SYNC_NOIDLE_WORKLOAD = 1,
176 SYNC_WORKLOAD = 2
177};
178
Tejun Heo155fead2012-04-01 14:38:44 -0700179struct cfqg_stats {
180#ifdef CONFIG_CFQ_GROUP_IOSCHED
Tejun Heo155fead2012-04-01 14:38:44 -0700181 /* number of ios merged */
182 struct blkg_rwstat merged;
183 /* total time spent on device in ns, may not be accurate w/ queueing */
184 struct blkg_rwstat service_time;
185 /* total time spent waiting in scheduler queue in ns */
186 struct blkg_rwstat wait_time;
187 /* number of IOs queued up */
188 struct blkg_rwstat queued;
Tejun Heo155fead2012-04-01 14:38:44 -0700189 /* total disk time and nr sectors dispatched by this group */
190 struct blkg_stat time;
191#ifdef CONFIG_DEBUG_BLK_CGROUP
192 /* time not charged to this cgroup */
193 struct blkg_stat unaccounted_time;
194 /* sum of number of ios queued across all samples */
195 struct blkg_stat avg_queue_size_sum;
196 /* count of samples taken for average */
197 struct blkg_stat avg_queue_size_samples;
198 /* how many times this group has been removed from service tree */
199 struct blkg_stat dequeue;
200 /* total time spent waiting for it to be assigned a timeslice. */
201 struct blkg_stat group_wait_time;
Tejun Heo3c798392012-04-16 13:57:25 -0700202 /* time spent idling for this blkcg_gq */
Tejun Heo155fead2012-04-01 14:38:44 -0700203 struct blkg_stat idle_time;
204 /* total time with empty current active q with other requests queued */
205 struct blkg_stat empty_time;
206 /* fields after this shouldn't be cleared on stat reset */
207 uint64_t start_group_wait_time;
208 uint64_t start_idle_time;
209 uint64_t start_empty_time;
210 uint16_t flags;
211#endif /* CONFIG_DEBUG_BLK_CGROUP */
212#endif /* CONFIG_CFQ_GROUP_IOSCHED */
213};
214
Arianna Avanzinie48453c2015-06-05 23:38:42 +0200215/* Per-cgroup data */
216struct cfq_group_data {
217 /* must be the first member */
Tejun Heo81437642015-08-18 14:55:15 -0700218 struct blkcg_policy_data cpd;
Arianna Avanzinie48453c2015-06-05 23:38:42 +0200219
220 unsigned int weight;
221 unsigned int leaf_weight;
222};
223
Vivek Goyalcdb16e82009-12-03 12:59:38 -0500224/* This is per cgroup per device grouping structure */
225struct cfq_group {
Tejun Heof95a04a2012-04-16 13:57:26 -0700226 /* must be the first member */
227 struct blkg_policy_data pd;
228
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -0500229 /* group service_tree member */
230 struct rb_node rb_node;
231
232 /* group service_tree key */
233 u64 vdisktime;
Tejun Heoe71357e2013-01-09 08:05:10 -0800234
235 /*
Tejun Heo7918ffb2013-01-09 08:05:11 -0800236 * The number of active cfqgs and sum of their weights under this
237 * cfqg. This covers this cfqg's leaf_weight and all children's
238 * weights, but does not cover weights of further descendants.
239 *
240 * If a cfqg is on the service tree, it's active. An active cfqg
241 * also activates its parent and contributes to the children_weight
242 * of the parent.
243 */
244 int nr_active;
245 unsigned int children_weight;
246
247 /*
Tejun Heo1d3650f2013-01-09 08:05:11 -0800248 * vfraction is the fraction of vdisktime that the tasks in this
249 * cfqg are entitled to. This is determined by compounding the
250 * ratios walking up from this cfqg to the root.
251 *
252 * It is in fixed point w/ CFQ_SERVICE_SHIFT and the sum of all
253 * vfractions on a service tree is approximately 1. The sum may
254 * deviate a bit due to rounding errors and fluctuations caused by
255 * cfqgs entering and leaving the service tree.
256 */
257 unsigned int vfraction;
258
259 /*
Tejun Heoe71357e2013-01-09 08:05:10 -0800260 * There are two weights - (internal) weight is the weight of this
261 * cfqg against the sibling cfqgs. leaf_weight is the wight of
262 * this cfqg against the child cfqgs. For the root cfqg, both
263 * weights are kept in sync for backward compatibility.
264 */
Vivek Goyal25bc6b02009-12-03 12:59:43 -0500265 unsigned int weight;
Justin TerAvest8184f932011-03-17 16:12:36 +0100266 unsigned int new_weight;
Tejun Heo3381cb82012-04-01 14:38:44 -0700267 unsigned int dev_weight;
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -0500268
Tejun Heoe71357e2013-01-09 08:05:10 -0800269 unsigned int leaf_weight;
270 unsigned int new_leaf_weight;
271 unsigned int dev_leaf_weight;
272
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -0500273 /* number of cfqq currently on this group */
274 int nr_cfqq;
275
Jens Axboe22e2c502005-06-27 10:55:12 +0200276 /*
Kyungmin Park4495a7d2011-05-31 10:04:09 +0200277 * Per group busy queues average. Useful for workload slice calc. We
Vivek Goyalb4627322010-10-22 09:48:43 +0200278 * create the array for each prio class but at run time it is used
279 * only for RT and BE class and slot for IDLE class remains unused.
280 * This is primarily done to avoid confusion and a gcc warning.
281 */
282 unsigned int busy_queues_avg[CFQ_PRIO_NR];
283 /*
284 * rr lists of queues with requests. We maintain service trees for
285 * RT and BE classes. These trees are subdivided in subclasses
286 * of SYNC, SYNC_NOIDLE and ASYNC based on workload type. For IDLE
287 * class there is no subclassification and all the cfq queues go on
288 * a single tree service_tree_idle.
Corrado Zoccoloc0324a02009-10-27 19:16:03 +0100289 * Counts are embedded in the cfq_rb_root
Jens Axboe22e2c502005-06-27 10:55:12 +0200290 */
Corrado Zoccolo718eee02009-10-26 22:45:29 +0100291 struct cfq_rb_root service_trees[2][3];
Corrado Zoccoloc0324a02009-10-27 19:16:03 +0100292 struct cfq_rb_root service_tree_idle;
Vivek Goyaldae739e2009-12-03 12:59:45 -0500293
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600294 u64 saved_wl_slice;
Vivek Goyal4d2ceea2012-10-03 16:56:57 -0400295 enum wl_type_t saved_wl_type;
296 enum wl_class_t saved_wl_class;
Tejun Heo4eef3042012-03-05 13:15:18 -0800297
Vivek Goyal80bdf0c2010-08-23 12:24:26 +0200298 /* number of requests that are on the dispatch list or inside driver */
299 int dispatched;
Shaohua Li7700fc42011-07-12 14:24:56 +0200300 struct cfq_ttime ttime;
Tejun Heo0b399202013-01-09 08:05:13 -0800301 struct cfqg_stats stats; /* stats for this cfqg */
Tejun Heo60a83702015-08-18 14:55:05 -0700302
303 /* async queue for each priority case */
304 struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR];
305 struct cfq_queue *async_idle_cfqq;
306
Vivek Goyalcdb16e82009-12-03 12:59:38 -0500307};
308
Tejun Heoc5869802011-12-14 00:33:41 +0100309struct cfq_io_cq {
310 struct io_cq icq; /* must be the first member */
311 struct cfq_queue *cfqq[2];
312 struct cfq_ttime ttime;
Tejun Heo598971b2012-03-19 15:10:58 -0700313 int ioprio; /* the current ioprio */
314#ifdef CONFIG_CFQ_GROUP_IOSCHED
Tejun Heof4da8072014-09-08 08:15:20 +0900315 uint64_t blkcg_serial_nr; /* the current blkcg serial */
Tejun Heo598971b2012-03-19 15:10:58 -0700316#endif
Tejun Heoc5869802011-12-14 00:33:41 +0100317};
318
Vivek Goyalcdb16e82009-12-03 12:59:38 -0500319/*
320 * Per block device queue structure
321 */
322struct cfq_data {
323 struct request_queue *queue;
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -0500324 /* Root service tree for cfq_groups */
325 struct cfq_rb_root grp_service_tree;
Tejun Heof51b8022012-03-05 13:15:05 -0800326 struct cfq_group *root_group;
Vivek Goyalcdb16e82009-12-03 12:59:38 -0500327
Corrado Zoccoloc0324a02009-10-27 19:16:03 +0100328 /*
329 * The priority currently being served
330 */
Vivek Goyal4d2ceea2012-10-03 16:56:57 -0400331 enum wl_class_t serving_wl_class;
332 enum wl_type_t serving_wl_type;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600333 u64 workload_expires;
Vivek Goyalcdb16e82009-12-03 12:59:38 -0500334 struct cfq_group *serving_group;
Jens Axboea36e71f2009-04-15 12:15:11 +0200335
336 /*
337 * Each priority tree is sorted by next_request position. These
338 * trees are used when determining if two or more queues are
339 * interleaving requests (see cfq_close_cooperator).
340 */
341 struct rb_root prio_trees[CFQ_PRIO_LISTS];
342
Jens Axboe22e2c502005-06-27 10:55:12 +0200343 unsigned int busy_queues;
Shaohua Lief8a41d2011-03-07 09:26:29 +0100344 unsigned int busy_sync_queues;
Jens Axboe22e2c502005-06-27 10:55:12 +0200345
Corrado Zoccolo53c583d2010-02-28 19:45:05 +0100346 int rq_in_driver;
347 int rq_in_flight[2];
Aaron Carroll45333d52008-08-26 15:52:36 +0200348
349 /*
350 * queue-depth detection
351 */
352 int rq_queued;
Jens Axboe25776e32006-06-01 10:12:26 +0200353 int hw_tag;
Corrado Zoccoloe459dd02009-11-26 10:02:57 +0100354 /*
355 * hw_tag can be
356 * -1 => indeterminate, (cfq will behave as if NCQ is present, to allow better detection)
357 * 1 => NCQ is present (hw_tag_est_depth is the estimated max depth)
358 * 0 => no NCQ
359 */
360 int hw_tag_est_depth;
361 unsigned int hw_tag_samples;
Jens Axboe22e2c502005-06-27 10:55:12 +0200362
363 /*
Jens Axboe22e2c502005-06-27 10:55:12 +0200364 * idle window management
365 */
Jan Kara91148322016-06-08 15:11:39 +0200366 struct hrtimer idle_slice_timer;
Jens Axboe23e018a2009-10-05 08:52:35 +0200367 struct work_struct unplug_work;
Jens Axboe22e2c502005-06-27 10:55:12 +0200368
369 struct cfq_queue *active_queue;
Tejun Heoc5869802011-12-14 00:33:41 +0100370 struct cfq_io_cq *active_cic;
Jens Axboe22e2c502005-06-27 10:55:12 +0200371
Jens Axboe6d048f52007-04-25 12:44:27 +0200372 sector_t last_position;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374 /*
375 * tunables, see top of file
376 */
377 unsigned int cfq_quantum;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378 unsigned int cfq_back_penalty;
379 unsigned int cfq_back_max;
Jens Axboe22e2c502005-06-27 10:55:12 +0200380 unsigned int cfq_slice_async_rq;
Jens Axboe963b72f2009-10-03 19:42:18 +0200381 unsigned int cfq_latency;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600382 u64 cfq_fifo_expire[2];
383 u64 cfq_slice[2];
384 u64 cfq_slice_idle;
385 u64 cfq_group_idle;
386 u64 cfq_target_latency;
Al Virod9ff4182006-03-18 13:51:22 -0500387
Jens Axboe6118b702009-06-30 09:34:12 +0200388 /*
389 * Fallback dummy cfqq for extreme OOM conditions
390 */
391 struct cfq_queue oom_cfqq;
Vivek Goyal365722b2009-10-03 15:21:27 +0200392
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600393 u64 last_delayed_sync;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394};
395
Vivek Goyal25fb5162009-12-03 12:59:46 -0500396static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
Tejun Heo60a83702015-08-18 14:55:05 -0700397static void cfq_put_queue(struct cfq_queue *cfqq);
Vivek Goyal25fb5162009-12-03 12:59:46 -0500398
Vivek Goyal34b98d02012-10-03 16:56:58 -0400399static struct cfq_rb_root *st_for(struct cfq_group *cfqg,
Vivek Goyal3bf10fe2012-10-03 16:56:56 -0400400 enum wl_class_t class,
Vivek Goyal65b32a52009-12-16 17:52:59 -0500401 enum wl_type_t type)
Corrado Zoccoloc0324a02009-10-27 19:16:03 +0100402{
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -0500403 if (!cfqg)
404 return NULL;
405
Vivek Goyal3bf10fe2012-10-03 16:56:56 -0400406 if (class == IDLE_WORKLOAD)
Vivek Goyalcdb16e82009-12-03 12:59:38 -0500407 return &cfqg->service_tree_idle;
Corrado Zoccoloc0324a02009-10-27 19:16:03 +0100408
Vivek Goyal3bf10fe2012-10-03 16:56:56 -0400409 return &cfqg->service_trees[class][type];
Corrado Zoccoloc0324a02009-10-27 19:16:03 +0100410}
411
Jens Axboe3b181522005-06-27 10:56:24 +0200412enum cfqq_state_flags {
Jens Axboeb0b8d7492007-01-19 11:35:30 +1100413 CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */
414 CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */
Jens Axboeb0291952009-04-07 11:38:31 +0200415 CFQ_CFQQ_FLAG_must_dispatch, /* must be allowed a dispatch */
Jens Axboeb0b8d7492007-01-19 11:35:30 +1100416 CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */
Jens Axboeb0b8d7492007-01-19 11:35:30 +1100417 CFQ_CFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */
418 CFQ_CFQQ_FLAG_idle_window, /* slice idling enabled */
419 CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */
Jens Axboe44f7c162007-01-19 11:51:58 +1100420 CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */
Vasily Tarasov91fac312007-04-25 12:29:51 +0200421 CFQ_CFQQ_FLAG_sync, /* synchronous queue */
Jeff Moyerb3b6d042009-10-23 17:14:51 -0400422 CFQ_CFQQ_FLAG_coop, /* cfqq is shared */
Shaohua Liae54abe2010-02-05 13:11:45 +0100423 CFQ_CFQQ_FLAG_split_coop, /* shared cfqq will be splitted */
Corrado Zoccolo76280af2009-11-26 10:02:58 +0100424 CFQ_CFQQ_FLAG_deep, /* sync cfqq experienced large depth */
Vivek Goyalf75edf22009-12-03 12:59:53 -0500425 CFQ_CFQQ_FLAG_wait_busy, /* Waiting for next request */
Jens Axboe3b181522005-06-27 10:56:24 +0200426};
427
428#define CFQ_CFQQ_FNS(name) \
429static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq) \
430{ \
Jens Axboefe094d92008-01-31 13:08:54 +0100431 (cfqq)->flags |= (1 << CFQ_CFQQ_FLAG_##name); \
Jens Axboe3b181522005-06-27 10:56:24 +0200432} \
433static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq) \
434{ \
Jens Axboefe094d92008-01-31 13:08:54 +0100435 (cfqq)->flags &= ~(1 << CFQ_CFQQ_FLAG_##name); \
Jens Axboe3b181522005-06-27 10:56:24 +0200436} \
437static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \
438{ \
Jens Axboefe094d92008-01-31 13:08:54 +0100439 return ((cfqq)->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \
Jens Axboe3b181522005-06-27 10:56:24 +0200440}
441
442CFQ_CFQQ_FNS(on_rr);
443CFQ_CFQQ_FNS(wait_request);
Jens Axboeb0291952009-04-07 11:38:31 +0200444CFQ_CFQQ_FNS(must_dispatch);
Jens Axboe3b181522005-06-27 10:56:24 +0200445CFQ_CFQQ_FNS(must_alloc_slice);
Jens Axboe3b181522005-06-27 10:56:24 +0200446CFQ_CFQQ_FNS(fifo_expire);
447CFQ_CFQQ_FNS(idle_window);
448CFQ_CFQQ_FNS(prio_changed);
Jens Axboe44f7c162007-01-19 11:51:58 +1100449CFQ_CFQQ_FNS(slice_new);
Vasily Tarasov91fac312007-04-25 12:29:51 +0200450CFQ_CFQQ_FNS(sync);
Jens Axboea36e71f2009-04-15 12:15:11 +0200451CFQ_CFQQ_FNS(coop);
Shaohua Liae54abe2010-02-05 13:11:45 +0100452CFQ_CFQQ_FNS(split_coop);
Corrado Zoccolo76280af2009-11-26 10:02:58 +0100453CFQ_CFQQ_FNS(deep);
Vivek Goyalf75edf22009-12-03 12:59:53 -0500454CFQ_CFQQ_FNS(wait_busy);
Jens Axboe3b181522005-06-27 10:56:24 +0200455#undef CFQ_CFQQ_FNS
456
Tejun Heo629ed0b2012-04-01 14:38:44 -0700457#if defined(CONFIG_CFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
Tejun Heo2ce4d502012-04-01 14:38:43 -0700458
Tejun Heo155fead2012-04-01 14:38:44 -0700459/* cfqg stats flags */
460enum cfqg_stats_flags {
461 CFQG_stats_waiting = 0,
462 CFQG_stats_idling,
463 CFQG_stats_empty,
Tejun Heo629ed0b2012-04-01 14:38:44 -0700464};
465
Tejun Heo155fead2012-04-01 14:38:44 -0700466#define CFQG_FLAG_FNS(name) \
467static inline void cfqg_stats_mark_##name(struct cfqg_stats *stats) \
Tejun Heo629ed0b2012-04-01 14:38:44 -0700468{ \
Tejun Heo155fead2012-04-01 14:38:44 -0700469 stats->flags |= (1 << CFQG_stats_##name); \
Tejun Heo629ed0b2012-04-01 14:38:44 -0700470} \
Tejun Heo155fead2012-04-01 14:38:44 -0700471static inline void cfqg_stats_clear_##name(struct cfqg_stats *stats) \
Tejun Heo629ed0b2012-04-01 14:38:44 -0700472{ \
Tejun Heo155fead2012-04-01 14:38:44 -0700473 stats->flags &= ~(1 << CFQG_stats_##name); \
Tejun Heo629ed0b2012-04-01 14:38:44 -0700474} \
Tejun Heo155fead2012-04-01 14:38:44 -0700475static inline int cfqg_stats_##name(struct cfqg_stats *stats) \
Tejun Heo629ed0b2012-04-01 14:38:44 -0700476{ \
Tejun Heo155fead2012-04-01 14:38:44 -0700477 return (stats->flags & (1 << CFQG_stats_##name)) != 0; \
Tejun Heo629ed0b2012-04-01 14:38:44 -0700478} \
479
Tejun Heo155fead2012-04-01 14:38:44 -0700480CFQG_FLAG_FNS(waiting)
481CFQG_FLAG_FNS(idling)
482CFQG_FLAG_FNS(empty)
483#undef CFQG_FLAG_FNS
Tejun Heo629ed0b2012-04-01 14:38:44 -0700484
485/* This should be called with the queue_lock held. */
Tejun Heo155fead2012-04-01 14:38:44 -0700486static void cfqg_stats_update_group_wait_time(struct cfqg_stats *stats)
Tejun Heo629ed0b2012-04-01 14:38:44 -0700487{
488 unsigned long long now;
489
Tejun Heo155fead2012-04-01 14:38:44 -0700490 if (!cfqg_stats_waiting(stats))
Tejun Heo629ed0b2012-04-01 14:38:44 -0700491 return;
492
493 now = sched_clock();
494 if (time_after64(now, stats->start_group_wait_time))
495 blkg_stat_add(&stats->group_wait_time,
496 now - stats->start_group_wait_time);
Tejun Heo155fead2012-04-01 14:38:44 -0700497 cfqg_stats_clear_waiting(stats);
Tejun Heo629ed0b2012-04-01 14:38:44 -0700498}
499
500/* This should be called with the queue_lock held. */
Tejun Heo155fead2012-04-01 14:38:44 -0700501static void cfqg_stats_set_start_group_wait_time(struct cfq_group *cfqg,
502 struct cfq_group *curr_cfqg)
Tejun Heo629ed0b2012-04-01 14:38:44 -0700503{
Tejun Heo155fead2012-04-01 14:38:44 -0700504 struct cfqg_stats *stats = &cfqg->stats;
Tejun Heo629ed0b2012-04-01 14:38:44 -0700505
Tejun Heo155fead2012-04-01 14:38:44 -0700506 if (cfqg_stats_waiting(stats))
Tejun Heo629ed0b2012-04-01 14:38:44 -0700507 return;
Tejun Heo155fead2012-04-01 14:38:44 -0700508 if (cfqg == curr_cfqg)
Tejun Heo629ed0b2012-04-01 14:38:44 -0700509 return;
Tejun Heo155fead2012-04-01 14:38:44 -0700510 stats->start_group_wait_time = sched_clock();
511 cfqg_stats_mark_waiting(stats);
Tejun Heo629ed0b2012-04-01 14:38:44 -0700512}
513
514/* This should be called with the queue_lock held. */
Tejun Heo155fead2012-04-01 14:38:44 -0700515static void cfqg_stats_end_empty_time(struct cfqg_stats *stats)
Tejun Heo629ed0b2012-04-01 14:38:44 -0700516{
517 unsigned long long now;
518
Tejun Heo155fead2012-04-01 14:38:44 -0700519 if (!cfqg_stats_empty(stats))
Tejun Heo629ed0b2012-04-01 14:38:44 -0700520 return;
521
522 now = sched_clock();
523 if (time_after64(now, stats->start_empty_time))
524 blkg_stat_add(&stats->empty_time,
525 now - stats->start_empty_time);
Tejun Heo155fead2012-04-01 14:38:44 -0700526 cfqg_stats_clear_empty(stats);
Tejun Heo629ed0b2012-04-01 14:38:44 -0700527}
528
Tejun Heo155fead2012-04-01 14:38:44 -0700529static void cfqg_stats_update_dequeue(struct cfq_group *cfqg)
Tejun Heo629ed0b2012-04-01 14:38:44 -0700530{
Tejun Heo155fead2012-04-01 14:38:44 -0700531 blkg_stat_add(&cfqg->stats.dequeue, 1);
Tejun Heo629ed0b2012-04-01 14:38:44 -0700532}
533
Tejun Heo155fead2012-04-01 14:38:44 -0700534static void cfqg_stats_set_start_empty_time(struct cfq_group *cfqg)
Tejun Heo629ed0b2012-04-01 14:38:44 -0700535{
Tejun Heo155fead2012-04-01 14:38:44 -0700536 struct cfqg_stats *stats = &cfqg->stats;
Tejun Heo629ed0b2012-04-01 14:38:44 -0700537
Tejun Heo4d5e80a2013-01-09 08:05:12 -0800538 if (blkg_rwstat_total(&stats->queued))
Tejun Heo629ed0b2012-04-01 14:38:44 -0700539 return;
540
541 /*
542 * group is already marked empty. This can happen if cfqq got new
543 * request in parent group and moved to this group while being added
544 * to service tree. Just ignore the event and move on.
545 */
Tejun Heo155fead2012-04-01 14:38:44 -0700546 if (cfqg_stats_empty(stats))
Tejun Heo629ed0b2012-04-01 14:38:44 -0700547 return;
548
549 stats->start_empty_time = sched_clock();
Tejun Heo155fead2012-04-01 14:38:44 -0700550 cfqg_stats_mark_empty(stats);
Tejun Heo629ed0b2012-04-01 14:38:44 -0700551}
552
Tejun Heo155fead2012-04-01 14:38:44 -0700553static void cfqg_stats_update_idle_time(struct cfq_group *cfqg)
Tejun Heo629ed0b2012-04-01 14:38:44 -0700554{
Tejun Heo155fead2012-04-01 14:38:44 -0700555 struct cfqg_stats *stats = &cfqg->stats;
Tejun Heo629ed0b2012-04-01 14:38:44 -0700556
Tejun Heo155fead2012-04-01 14:38:44 -0700557 if (cfqg_stats_idling(stats)) {
Tejun Heo629ed0b2012-04-01 14:38:44 -0700558 unsigned long long now = sched_clock();
559
560 if (time_after64(now, stats->start_idle_time))
561 blkg_stat_add(&stats->idle_time,
562 now - stats->start_idle_time);
Tejun Heo155fead2012-04-01 14:38:44 -0700563 cfqg_stats_clear_idling(stats);
Tejun Heo629ed0b2012-04-01 14:38:44 -0700564 }
565}
566
Tejun Heo155fead2012-04-01 14:38:44 -0700567static void cfqg_stats_set_start_idle_time(struct cfq_group *cfqg)
Tejun Heo629ed0b2012-04-01 14:38:44 -0700568{
Tejun Heo155fead2012-04-01 14:38:44 -0700569 struct cfqg_stats *stats = &cfqg->stats;
Tejun Heo629ed0b2012-04-01 14:38:44 -0700570
Tejun Heo155fead2012-04-01 14:38:44 -0700571 BUG_ON(cfqg_stats_idling(stats));
Tejun Heo629ed0b2012-04-01 14:38:44 -0700572
573 stats->start_idle_time = sched_clock();
Tejun Heo155fead2012-04-01 14:38:44 -0700574 cfqg_stats_mark_idling(stats);
Tejun Heo629ed0b2012-04-01 14:38:44 -0700575}
576
Tejun Heo155fead2012-04-01 14:38:44 -0700577static void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg)
Tejun Heo629ed0b2012-04-01 14:38:44 -0700578{
Tejun Heo155fead2012-04-01 14:38:44 -0700579 struct cfqg_stats *stats = &cfqg->stats;
Tejun Heo629ed0b2012-04-01 14:38:44 -0700580
581 blkg_stat_add(&stats->avg_queue_size_sum,
Tejun Heo4d5e80a2013-01-09 08:05:12 -0800582 blkg_rwstat_total(&stats->queued));
Tejun Heo629ed0b2012-04-01 14:38:44 -0700583 blkg_stat_add(&stats->avg_queue_size_samples, 1);
Tejun Heo155fead2012-04-01 14:38:44 -0700584 cfqg_stats_update_group_wait_time(stats);
Tejun Heo629ed0b2012-04-01 14:38:44 -0700585}
586
587#else /* CONFIG_CFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
588
Tejun Heof48ec1d2012-04-13 13:11:25 -0700589static inline void cfqg_stats_set_start_group_wait_time(struct cfq_group *cfqg, struct cfq_group *curr_cfqg) { }
590static inline void cfqg_stats_end_empty_time(struct cfqg_stats *stats) { }
591static inline void cfqg_stats_update_dequeue(struct cfq_group *cfqg) { }
592static inline void cfqg_stats_set_start_empty_time(struct cfq_group *cfqg) { }
593static inline void cfqg_stats_update_idle_time(struct cfq_group *cfqg) { }
594static inline void cfqg_stats_set_start_idle_time(struct cfq_group *cfqg) { }
595static inline void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg) { }
Tejun Heo629ed0b2012-04-01 14:38:44 -0700596
597#endif /* CONFIG_CFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
598
599#ifdef CONFIG_CFQ_GROUP_IOSCHED
Tejun Heo2ce4d502012-04-01 14:38:43 -0700600
Jens Axboe4ceab712015-06-19 10:13:01 -0600601static inline struct cfq_group *pd_to_cfqg(struct blkg_policy_data *pd)
602{
603 return pd ? container_of(pd, struct cfq_group, pd) : NULL;
604}
605
606static struct cfq_group_data
607*cpd_to_cfqgd(struct blkcg_policy_data *cpd)
608{
Tejun Heo81437642015-08-18 14:55:15 -0700609 return cpd ? container_of(cpd, struct cfq_group_data, cpd) : NULL;
Jens Axboe4ceab712015-06-19 10:13:01 -0600610}
611
612static inline struct blkcg_gq *cfqg_to_blkg(struct cfq_group *cfqg)
613{
614 return pd_to_blkg(&cfqg->pd);
615}
616
Tejun Heoffea73f2012-06-04 10:02:29 +0200617static struct blkcg_policy blkcg_policy_cfq;
618
619static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg)
620{
621 return pd_to_cfqg(blkg_to_pd(blkg, &blkcg_policy_cfq));
622}
623
Arianna Avanzinie48453c2015-06-05 23:38:42 +0200624static struct cfq_group_data *blkcg_to_cfqgd(struct blkcg *blkcg)
625{
626 return cpd_to_cfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_cfq));
627}
628
Tejun Heod02f7aa2013-01-09 08:05:11 -0800629static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg)
Tejun Heo7918ffb2013-01-09 08:05:11 -0800630{
Tejun Heod02f7aa2013-01-09 08:05:11 -0800631 struct blkcg_gq *pblkg = cfqg_to_blkg(cfqg)->parent;
Tejun Heo7918ffb2013-01-09 08:05:11 -0800632
Tejun Heod02f7aa2013-01-09 08:05:11 -0800633 return pblkg ? blkg_to_cfqg(pblkg) : NULL;
Tejun Heo7918ffb2013-01-09 08:05:11 -0800634}
635
Jan Kara3984aa52016-01-12 16:24:19 +0100636static inline bool cfqg_is_descendant(struct cfq_group *cfqg,
637 struct cfq_group *ancestor)
638{
639 return cgroup_is_descendant(cfqg_to_blkg(cfqg)->blkcg->css.cgroup,
640 cfqg_to_blkg(ancestor)->blkcg->css.cgroup);
641}
642
Tejun Heoeb7d8c072012-03-23 14:02:53 +0100643static inline void cfqg_get(struct cfq_group *cfqg)
644{
645 return blkg_get(cfqg_to_blkg(cfqg));
646}
647
648static inline void cfqg_put(struct cfq_group *cfqg)
649{
650 return blkg_put(cfqg_to_blkg(cfqg));
651}
652
Tejun Heo54e7ed12012-04-16 13:57:23 -0700653#define cfq_log_cfqq(cfqd, cfqq, fmt, args...) do { \
654 char __pbuf[128]; \
655 \
656 blkg_path(cfqg_to_blkg((cfqq)->cfqg), __pbuf, sizeof(__pbuf)); \
Vivek Goyalb226e5c2012-10-03 16:57:01 -0400657 blk_add_trace_msg((cfqd)->queue, "cfq%d%c%c %s " fmt, (cfqq)->pid, \
658 cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \
659 cfqq_type((cfqq)) == SYNC_NOIDLE_WORKLOAD ? 'N' : ' ',\
Tejun Heo54e7ed12012-04-16 13:57:23 -0700660 __pbuf, ##args); \
661} while (0)
Vivek Goyal2868ef72009-12-03 12:59:48 -0500662
Tejun Heo54e7ed12012-04-16 13:57:23 -0700663#define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do { \
664 char __pbuf[128]; \
665 \
666 blkg_path(cfqg_to_blkg(cfqg), __pbuf, sizeof(__pbuf)); \
667 blk_add_trace_msg((cfqd)->queue, "%s " fmt, __pbuf, ##args); \
668} while (0)
Vivek Goyal2868ef72009-12-03 12:59:48 -0500669
Tejun Heo155fead2012-04-01 14:38:44 -0700670static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg,
Christoph Hellwigef295ec2016-10-28 08:48:16 -0600671 struct cfq_group *curr_cfqg,
672 unsigned int op)
Tejun Heo2ce4d502012-04-01 14:38:43 -0700673{
Christoph Hellwigef295ec2016-10-28 08:48:16 -0600674 blkg_rwstat_add(&cfqg->stats.queued, op, 1);
Tejun Heo155fead2012-04-01 14:38:44 -0700675 cfqg_stats_end_empty_time(&cfqg->stats);
676 cfqg_stats_set_start_group_wait_time(cfqg, curr_cfqg);
Tejun Heo2ce4d502012-04-01 14:38:43 -0700677}
678
Tejun Heo155fead2012-04-01 14:38:44 -0700679static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg,
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600680 uint64_t time, unsigned long unaccounted_time)
Tejun Heo2ce4d502012-04-01 14:38:43 -0700681{
Tejun Heo155fead2012-04-01 14:38:44 -0700682 blkg_stat_add(&cfqg->stats.time, time);
Tejun Heo629ed0b2012-04-01 14:38:44 -0700683#ifdef CONFIG_DEBUG_BLK_CGROUP
Tejun Heo155fead2012-04-01 14:38:44 -0700684 blkg_stat_add(&cfqg->stats.unaccounted_time, unaccounted_time);
Tejun Heo629ed0b2012-04-01 14:38:44 -0700685#endif
Tejun Heo2ce4d502012-04-01 14:38:43 -0700686}
687
Christoph Hellwigef295ec2016-10-28 08:48:16 -0600688static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg,
689 unsigned int op)
Tejun Heo2ce4d502012-04-01 14:38:43 -0700690{
Christoph Hellwigef295ec2016-10-28 08:48:16 -0600691 blkg_rwstat_add(&cfqg->stats.queued, op, -1);
Tejun Heo2ce4d502012-04-01 14:38:43 -0700692}
693
Christoph Hellwigef295ec2016-10-28 08:48:16 -0600694static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg,
695 unsigned int op)
Tejun Heo2ce4d502012-04-01 14:38:43 -0700696{
Christoph Hellwigef295ec2016-10-28 08:48:16 -0600697 blkg_rwstat_add(&cfqg->stats.merged, op, 1);
Tejun Heo2ce4d502012-04-01 14:38:43 -0700698}
699
Tejun Heo155fead2012-04-01 14:38:44 -0700700static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
Christoph Hellwigef295ec2016-10-28 08:48:16 -0600701 uint64_t start_time, uint64_t io_start_time,
702 unsigned int op)
Tejun Heo2ce4d502012-04-01 14:38:43 -0700703{
Tejun Heo155fead2012-04-01 14:38:44 -0700704 struct cfqg_stats *stats = &cfqg->stats;
Tejun Heo629ed0b2012-04-01 14:38:44 -0700705 unsigned long long now = sched_clock();
Tejun Heo629ed0b2012-04-01 14:38:44 -0700706
707 if (time_after64(now, io_start_time))
Christoph Hellwigef295ec2016-10-28 08:48:16 -0600708 blkg_rwstat_add(&stats->service_time, op, now - io_start_time);
Tejun Heo629ed0b2012-04-01 14:38:44 -0700709 if (time_after64(io_start_time, start_time))
Christoph Hellwigef295ec2016-10-28 08:48:16 -0600710 blkg_rwstat_add(&stats->wait_time, op,
Tejun Heo629ed0b2012-04-01 14:38:44 -0700711 io_start_time - start_time);
Tejun Heo2ce4d502012-04-01 14:38:43 -0700712}
713
Tejun Heo689665a2013-01-09 08:05:13 -0800714/* @stats = 0 */
715static void cfqg_stats_reset(struct cfqg_stats *stats)
Tejun Heo155fead2012-04-01 14:38:44 -0700716{
Tejun Heo155fead2012-04-01 14:38:44 -0700717 /* queued stats shouldn't be cleared */
Tejun Heo155fead2012-04-01 14:38:44 -0700718 blkg_rwstat_reset(&stats->merged);
719 blkg_rwstat_reset(&stats->service_time);
720 blkg_rwstat_reset(&stats->wait_time);
721 blkg_stat_reset(&stats->time);
722#ifdef CONFIG_DEBUG_BLK_CGROUP
723 blkg_stat_reset(&stats->unaccounted_time);
724 blkg_stat_reset(&stats->avg_queue_size_sum);
725 blkg_stat_reset(&stats->avg_queue_size_samples);
726 blkg_stat_reset(&stats->dequeue);
727 blkg_stat_reset(&stats->group_wait_time);
728 blkg_stat_reset(&stats->idle_time);
729 blkg_stat_reset(&stats->empty_time);
730#endif
731}
732
Tejun Heo0b399202013-01-09 08:05:13 -0800733/* @to += @from */
Tejun Heoe6269c42015-08-18 14:55:21 -0700734static void cfqg_stats_add_aux(struct cfqg_stats *to, struct cfqg_stats *from)
Tejun Heo0b399202013-01-09 08:05:13 -0800735{
736 /* queued stats shouldn't be cleared */
Tejun Heoe6269c42015-08-18 14:55:21 -0700737 blkg_rwstat_add_aux(&to->merged, &from->merged);
738 blkg_rwstat_add_aux(&to->service_time, &from->service_time);
739 blkg_rwstat_add_aux(&to->wait_time, &from->wait_time);
740 blkg_stat_add_aux(&from->time, &from->time);
Tejun Heo0b399202013-01-09 08:05:13 -0800741#ifdef CONFIG_DEBUG_BLK_CGROUP
Tejun Heoe6269c42015-08-18 14:55:21 -0700742 blkg_stat_add_aux(&to->unaccounted_time, &from->unaccounted_time);
743 blkg_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum);
744 blkg_stat_add_aux(&to->avg_queue_size_samples, &from->avg_queue_size_samples);
745 blkg_stat_add_aux(&to->dequeue, &from->dequeue);
746 blkg_stat_add_aux(&to->group_wait_time, &from->group_wait_time);
747 blkg_stat_add_aux(&to->idle_time, &from->idle_time);
748 blkg_stat_add_aux(&to->empty_time, &from->empty_time);
Tejun Heo0b399202013-01-09 08:05:13 -0800749#endif
750}
751
752/*
Tejun Heoe6269c42015-08-18 14:55:21 -0700753 * Transfer @cfqg's stats to its parent's aux counts so that the ancestors'
Tejun Heo0b399202013-01-09 08:05:13 -0800754 * recursive stats can still account for the amount used by this cfqg after
755 * it's gone.
756 */
757static void cfqg_stats_xfer_dead(struct cfq_group *cfqg)
758{
759 struct cfq_group *parent = cfqg_parent(cfqg);
760
761 lockdep_assert_held(cfqg_to_blkg(cfqg)->q->queue_lock);
762
763 if (unlikely(!parent))
764 return;
765
Tejun Heoe6269c42015-08-18 14:55:21 -0700766 cfqg_stats_add_aux(&parent->stats, &cfqg->stats);
Tejun Heo0b399202013-01-09 08:05:13 -0800767 cfqg_stats_reset(&cfqg->stats);
Tejun Heo0b399202013-01-09 08:05:13 -0800768}
769
Tejun Heoeb7d8c072012-03-23 14:02:53 +0100770#else /* CONFIG_CFQ_GROUP_IOSCHED */
771
Tejun Heod02f7aa2013-01-09 08:05:11 -0800772static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg) { return NULL; }
Jan Kara3984aa52016-01-12 16:24:19 +0100773static inline bool cfqg_is_descendant(struct cfq_group *cfqg,
774 struct cfq_group *ancestor)
775{
776 return true;
777}
Tejun Heoeb7d8c072012-03-23 14:02:53 +0100778static inline void cfqg_get(struct cfq_group *cfqg) { }
779static inline void cfqg_put(struct cfq_group *cfqg) { }
780
Jens Axboe7b679132008-05-30 12:23:07 +0200781#define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
Vivek Goyalb226e5c2012-10-03 16:57:01 -0400782 blk_add_trace_msg((cfqd)->queue, "cfq%d%c%c " fmt, (cfqq)->pid, \
783 cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \
784 cfqq_type((cfqq)) == SYNC_NOIDLE_WORKLOAD ? 'N' : ' ',\
785 ##args)
Kyungmin Park4495a7d2011-05-31 10:04:09 +0200786#define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do {} while (0)
Tejun Heoeb7d8c072012-03-23 14:02:53 +0100787
Tejun Heo155fead2012-04-01 14:38:44 -0700788static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg,
Christoph Hellwigef295ec2016-10-28 08:48:16 -0600789 struct cfq_group *curr_cfqg, unsigned int op) { }
Tejun Heo155fead2012-04-01 14:38:44 -0700790static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg,
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600791 uint64_t time, unsigned long unaccounted_time) { }
Christoph Hellwigef295ec2016-10-28 08:48:16 -0600792static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg,
793 unsigned int op) { }
794static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg,
795 unsigned int op) { }
Tejun Heo155fead2012-04-01 14:38:44 -0700796static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
Christoph Hellwigef295ec2016-10-28 08:48:16 -0600797 uint64_t start_time, uint64_t io_start_time,
798 unsigned int op) { }
Tejun Heo2ce4d502012-04-01 14:38:43 -0700799
Tejun Heoeb7d8c072012-03-23 14:02:53 +0100800#endif /* CONFIG_CFQ_GROUP_IOSCHED */
801
Jens Axboe7b679132008-05-30 12:23:07 +0200802#define cfq_log(cfqd, fmt, args...) \
803 blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args)
804
Vivek Goyal615f0252009-12-03 12:59:39 -0500805/* Traverses through cfq group service trees */
806#define for_each_cfqg_st(cfqg, i, j, st) \
807 for (i = 0; i <= IDLE_WORKLOAD; i++) \
808 for (j = 0, st = i < IDLE_WORKLOAD ? &cfqg->service_trees[i][j]\
809 : &cfqg->service_tree_idle; \
810 (i < IDLE_WORKLOAD && j <= SYNC_WORKLOAD) || \
811 (i == IDLE_WORKLOAD && j == 0); \
812 j++, st = i < IDLE_WORKLOAD ? \
813 &cfqg->service_trees[i][j]: NULL) \
814
Shaohua Lif5f2b6c2011-07-12 14:24:55 +0200815static inline bool cfq_io_thinktime_big(struct cfq_data *cfqd,
816 struct cfq_ttime *ttime, bool group_idle)
817{
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600818 u64 slice;
Shaohua Lif5f2b6c2011-07-12 14:24:55 +0200819 if (!sample_valid(ttime->ttime_samples))
820 return false;
821 if (group_idle)
822 slice = cfqd->cfq_group_idle;
823 else
824 slice = cfqd->cfq_slice_idle;
825 return ttime->ttime_mean > slice;
826}
Vivek Goyal615f0252009-12-03 12:59:39 -0500827
Vivek Goyal02b35082010-08-23 12:23:53 +0200828static inline bool iops_mode(struct cfq_data *cfqd)
829{
830 /*
831 * If we are not idling on queues and it is a NCQ drive, parallel
832 * execution of requests is on and measuring time is not possible
833 * in most of the cases until and unless we drive shallower queue
834 * depths and that becomes a performance bottleneck. In such cases
835 * switch to start providing fairness in terms of number of IOs.
836 */
837 if (!cfqd->cfq_slice_idle && cfqd->hw_tag)
838 return true;
839 else
840 return false;
841}
842
Vivek Goyal3bf10fe2012-10-03 16:56:56 -0400843static inline enum wl_class_t cfqq_class(struct cfq_queue *cfqq)
Corrado Zoccoloc0324a02009-10-27 19:16:03 +0100844{
845 if (cfq_class_idle(cfqq))
846 return IDLE_WORKLOAD;
847 if (cfq_class_rt(cfqq))
848 return RT_WORKLOAD;
849 return BE_WORKLOAD;
850}
851
Corrado Zoccolo718eee02009-10-26 22:45:29 +0100852
853static enum wl_type_t cfqq_type(struct cfq_queue *cfqq)
854{
855 if (!cfq_cfqq_sync(cfqq))
856 return ASYNC_WORKLOAD;
857 if (!cfq_cfqq_idle_window(cfqq))
858 return SYNC_NOIDLE_WORKLOAD;
859 return SYNC_WORKLOAD;
860}
861
Vivek Goyal3bf10fe2012-10-03 16:56:56 -0400862static inline int cfq_group_busy_queues_wl(enum wl_class_t wl_class,
Vivek Goyal58ff82f2009-12-03 12:59:44 -0500863 struct cfq_data *cfqd,
864 struct cfq_group *cfqg)
Corrado Zoccoloc0324a02009-10-27 19:16:03 +0100865{
Vivek Goyal3bf10fe2012-10-03 16:56:56 -0400866 if (wl_class == IDLE_WORKLOAD)
Vivek Goyalcdb16e82009-12-03 12:59:38 -0500867 return cfqg->service_tree_idle.count;
868
Vivek Goyal34b98d02012-10-03 16:56:58 -0400869 return cfqg->service_trees[wl_class][ASYNC_WORKLOAD].count +
870 cfqg->service_trees[wl_class][SYNC_NOIDLE_WORKLOAD].count +
871 cfqg->service_trees[wl_class][SYNC_WORKLOAD].count;
Corrado Zoccoloc0324a02009-10-27 19:16:03 +0100872}
873
Vivek Goyalf26bd1f2009-12-03 12:59:54 -0500874static inline int cfqg_busy_async_queues(struct cfq_data *cfqd,
875 struct cfq_group *cfqg)
876{
Vivek Goyal34b98d02012-10-03 16:56:58 -0400877 return cfqg->service_trees[RT_WORKLOAD][ASYNC_WORKLOAD].count +
878 cfqg->service_trees[BE_WORKLOAD][ASYNC_WORKLOAD].count;
Vivek Goyalf26bd1f2009-12-03 12:59:54 -0500879}
880
Jens Axboe165125e2007-07-24 09:28:11 +0200881static void cfq_dispatch_insert(struct request_queue *, struct request *);
Tejun Heo4f85cb92012-03-05 13:15:28 -0800882static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, bool is_sync,
Tejun Heo2da8de02015-08-18 14:55:02 -0700883 struct cfq_io_cq *cic, struct bio *bio);
Vasily Tarasov91fac312007-04-25 12:29:51 +0200884
Tejun Heoc5869802011-12-14 00:33:41 +0100885static inline struct cfq_io_cq *icq_to_cic(struct io_cq *icq)
886{
887 /* cic->icq is the first member, %NULL will convert to %NULL */
888 return container_of(icq, struct cfq_io_cq, icq);
889}
890
Tejun Heo47fdd4c2011-12-14 00:33:42 +0100891static inline struct cfq_io_cq *cfq_cic_lookup(struct cfq_data *cfqd,
892 struct io_context *ioc)
893{
894 if (ioc)
895 return icq_to_cic(ioc_lookup_icq(ioc, cfqd->queue));
896 return NULL;
897}
898
Tejun Heoc5869802011-12-14 00:33:41 +0100899static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_cq *cic, bool is_sync)
Vasily Tarasov91fac312007-04-25 12:29:51 +0200900{
Jens Axboea6151c32009-10-07 20:02:57 +0200901 return cic->cfqq[is_sync];
Vasily Tarasov91fac312007-04-25 12:29:51 +0200902}
903
Tejun Heoc5869802011-12-14 00:33:41 +0100904static inline void cic_set_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq,
905 bool is_sync)
Vasily Tarasov91fac312007-04-25 12:29:51 +0200906{
Jens Axboea6151c32009-10-07 20:02:57 +0200907 cic->cfqq[is_sync] = cfqq;
Vasily Tarasov91fac312007-04-25 12:29:51 +0200908}
909
Tejun Heoc5869802011-12-14 00:33:41 +0100910static inline struct cfq_data *cic_to_cfqd(struct cfq_io_cq *cic)
Konstantin Khlebnikovbca4b912010-05-20 23:21:34 +0400911{
Tejun Heoc5869802011-12-14 00:33:41 +0100912 return cic->icq.q->elevator->elevator_data;
Konstantin Khlebnikovbca4b912010-05-20 23:21:34 +0400913}
914
Vasily Tarasov91fac312007-04-25 12:29:51 +0200915/*
Andrew Morton99f95e52005-06-27 20:14:05 -0700916 * scheduler run of queue, if there are requests pending and no one in the
917 * driver that will restart queueing
918 */
Jens Axboe23e018a2009-10-05 08:52:35 +0200919static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
Andrew Morton99f95e52005-06-27 20:14:05 -0700920{
Jens Axboe7b679132008-05-30 12:23:07 +0200921 if (cfqd->busy_queues) {
922 cfq_log(cfqd, "schedule dispatch");
Jens Axboe59c3d452014-04-08 09:15:35 -0600923 kblockd_schedule_work(&cfqd->unplug_work);
Jens Axboe7b679132008-05-30 12:23:07 +0200924 }
Andrew Morton99f95e52005-06-27 20:14:05 -0700925}
926
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927/*
Jens Axboe44f7c162007-01-19 11:51:58 +1100928 * Scale schedule slice based on io priority. Use the sync time slice only
929 * if a queue is marked sync and has sync io queued. A sync queue with async
930 * io only, should not get full sync slice length.
931 */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600932static inline u64 cfq_prio_slice(struct cfq_data *cfqd, bool sync,
Jens Axboed9e76202007-04-20 14:27:50 +0200933 unsigned short prio)
934{
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600935 u64 base_slice = cfqd->cfq_slice[sync];
936 u64 slice = div_u64(base_slice, CFQ_SLICE_SCALE);
Jens Axboed9e76202007-04-20 14:27:50 +0200937
938 WARN_ON(prio >= IOPRIO_BE_NR);
939
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600940 return base_slice + (slice * (4 - prio));
Jens Axboed9e76202007-04-20 14:27:50 +0200941}
942
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600943static inline u64
Jens Axboe44f7c162007-01-19 11:51:58 +1100944cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
945{
Jens Axboed9e76202007-04-20 14:27:50 +0200946 return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
Jens Axboe44f7c162007-01-19 11:51:58 +1100947}
948
Tejun Heo1d3650f2013-01-09 08:05:11 -0800949/**
950 * cfqg_scale_charge - scale disk time charge according to cfqg weight
951 * @charge: disk time being charged
952 * @vfraction: vfraction of the cfqg, fixed point w/ CFQ_SERVICE_SHIFT
953 *
954 * Scale @charge according to @vfraction, which is in range (0, 1]. The
955 * scaling is inversely proportional.
956 *
957 * scaled = charge / vfraction
958 *
959 * The result is also in fixed point w/ CFQ_SERVICE_SHIFT.
960 */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600961static inline u64 cfqg_scale_charge(u64 charge,
Tejun Heo1d3650f2013-01-09 08:05:11 -0800962 unsigned int vfraction)
Vivek Goyal25bc6b02009-12-03 12:59:43 -0500963{
Tejun Heo1d3650f2013-01-09 08:05:11 -0800964 u64 c = charge << CFQ_SERVICE_SHIFT; /* make it fixed point */
Vivek Goyal25bc6b02009-12-03 12:59:43 -0500965
Tejun Heo1d3650f2013-01-09 08:05:11 -0800966 /* charge / vfraction */
967 c <<= CFQ_SERVICE_SHIFT;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600968 return div_u64(c, vfraction);
Vivek Goyal25bc6b02009-12-03 12:59:43 -0500969}
970
971static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime)
972{
973 s64 delta = (s64)(vdisktime - min_vdisktime);
974 if (delta > 0)
975 min_vdisktime = vdisktime;
976
977 return min_vdisktime;
978}
979
980static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime)
981{
982 s64 delta = (s64)(vdisktime - min_vdisktime);
983 if (delta < 0)
984 min_vdisktime = vdisktime;
985
986 return min_vdisktime;
987}
988
989static void update_min_vdisktime(struct cfq_rb_root *st)
990{
Vivek Goyal25bc6b02009-12-03 12:59:43 -0500991 struct cfq_group *cfqg;
992
Vivek Goyal25bc6b02009-12-03 12:59:43 -0500993 if (st->left) {
994 cfqg = rb_entry_cfqg(st->left);
Gui Jianfenga6032712011-03-07 09:28:09 +0100995 st->min_vdisktime = max_vdisktime(st->min_vdisktime,
996 cfqg->vdisktime);
Vivek Goyal25bc6b02009-12-03 12:59:43 -0500997 }
Vivek Goyal25bc6b02009-12-03 12:59:43 -0500998}
999
Corrado Zoccolo5db5d642009-10-26 22:44:04 +01001000/*
1001 * get averaged number of queues of RT/BE priority.
1002 * average is updated, with a formula that gives more weight to higher numbers,
1003 * to quickly follows sudden increases and decrease slowly
1004 */
1005
Vivek Goyal58ff82f2009-12-03 12:59:44 -05001006static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd,
1007 struct cfq_group *cfqg, bool rt)
Jens Axboe5869619c2009-10-28 09:27:07 +01001008{
Corrado Zoccolo5db5d642009-10-26 22:44:04 +01001009 unsigned min_q, max_q;
1010 unsigned mult = cfq_hist_divisor - 1;
1011 unsigned round = cfq_hist_divisor / 2;
Vivek Goyal58ff82f2009-12-03 12:59:44 -05001012 unsigned busy = cfq_group_busy_queues_wl(rt, cfqd, cfqg);
Corrado Zoccolo5db5d642009-10-26 22:44:04 +01001013
Vivek Goyal58ff82f2009-12-03 12:59:44 -05001014 min_q = min(cfqg->busy_queues_avg[rt], busy);
1015 max_q = max(cfqg->busy_queues_avg[rt], busy);
1016 cfqg->busy_queues_avg[rt] = (mult * max_q + min_q + round) /
Corrado Zoccolo5db5d642009-10-26 22:44:04 +01001017 cfq_hist_divisor;
Vivek Goyal58ff82f2009-12-03 12:59:44 -05001018 return cfqg->busy_queues_avg[rt];
1019}
1020
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001021static inline u64
Vivek Goyal58ff82f2009-12-03 12:59:44 -05001022cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg)
1023{
Tejun Heo41cad6a2013-01-09 08:05:11 -08001024 return cfqd->cfq_target_latency * cfqg->vfraction >> CFQ_SERVICE_SHIFT;
Corrado Zoccolo5db5d642009-10-26 22:44:04 +01001025}
1026
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001027static inline u64
Vivek Goyalba5bd522011-01-19 08:25:02 -07001028cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
Jens Axboe44f7c162007-01-19 11:51:58 +11001029{
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001030 u64 slice = cfq_prio_to_slice(cfqd, cfqq);
Corrado Zoccolo5db5d642009-10-26 22:44:04 +01001031 if (cfqd->cfq_latency) {
Vivek Goyal58ff82f2009-12-03 12:59:44 -05001032 /*
1033 * interested queues (we consider only the ones with the same
1034 * priority class in the cfq group)
1035 */
1036 unsigned iq = cfq_group_get_avg_queues(cfqd, cfqq->cfqg,
1037 cfq_class_rt(cfqq));
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001038 u64 sync_slice = cfqd->cfq_slice[1];
1039 u64 expect_latency = sync_slice * iq;
1040 u64 group_slice = cfq_group_slice(cfqd, cfqq->cfqg);
Vivek Goyal58ff82f2009-12-03 12:59:44 -05001041
1042 if (expect_latency > group_slice) {
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001043 u64 base_low_slice = 2 * cfqd->cfq_slice_idle;
1044 u64 low_slice;
1045
Corrado Zoccolo5db5d642009-10-26 22:44:04 +01001046 /* scale low_slice according to IO priority
1047 * and sync vs async */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001048 low_slice = div64_u64(base_low_slice*slice, sync_slice);
1049 low_slice = min(slice, low_slice);
Corrado Zoccolo5db5d642009-10-26 22:44:04 +01001050 /* the adapted slice value is scaled to fit all iqs
1051 * into the target latency */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001052 slice = div64_u64(slice*group_slice, expect_latency);
1053 slice = max(slice, low_slice);
Corrado Zoccolo5db5d642009-10-26 22:44:04 +01001054 }
1055 }
Shaohua Lic553f8e2011-01-14 08:41:03 +01001056 return slice;
1057}
1058
1059static inline void
1060cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1061{
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001062 u64 slice = cfq_scaled_cfqq_slice(cfqd, cfqq);
1063 u64 now = ktime_get_ns();
Shaohua Lic553f8e2011-01-14 08:41:03 +01001064
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001065 cfqq->slice_start = now;
1066 cfqq->slice_end = now + slice;
Vivek Goyalf75edf22009-12-03 12:59:53 -05001067 cfqq->allocated_slice = slice;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001068 cfq_log_cfqq(cfqd, cfqq, "set_slice=%llu", cfqq->slice_end - now);
Jens Axboe44f7c162007-01-19 11:51:58 +11001069}
1070
1071/*
1072 * We need to wrap this check in cfq_cfqq_slice_new(), since ->slice_end
1073 * isn't valid until the first request from the dispatch is activated
1074 * and the slice time set.
1075 */
Jens Axboea6151c32009-10-07 20:02:57 +02001076static inline bool cfq_slice_used(struct cfq_queue *cfqq)
Jens Axboe44f7c162007-01-19 11:51:58 +11001077{
1078 if (cfq_cfqq_slice_new(cfqq))
Shaohua Lic1e44752010-11-08 15:01:02 +01001079 return false;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001080 if (ktime_get_ns() < cfqq->slice_end)
Shaohua Lic1e44752010-11-08 15:01:02 +01001081 return false;
Jens Axboe44f7c162007-01-19 11:51:58 +11001082
Shaohua Lic1e44752010-11-08 15:01:02 +01001083 return true;
Jens Axboe44f7c162007-01-19 11:51:58 +11001084}
1085
1086/*
Jens Axboe5e705372006-07-13 12:39:25 +02001087 * Lifted from AS - choose which of rq1 and rq2 that is best served now.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088 * We choose the request that is closest to the head right now. Distance
Andreas Mohre8a99052006-03-28 08:59:49 +02001089 * behind the head is penalized and only allowed to a certain extent.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001090 */
Jens Axboe5e705372006-07-13 12:39:25 +02001091static struct request *
Corrado Zoccolocf7c25c2009-11-08 17:16:46 +01001092cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2, sector_t last)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093{
Corrado Zoccolocf7c25c2009-11-08 17:16:46 +01001094 sector_t s1, s2, d1 = 0, d2 = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001095 unsigned long back_max;
Andreas Mohre8a99052006-03-28 08:59:49 +02001096#define CFQ_RQ1_WRAP 0x01 /* request 1 wraps */
1097#define CFQ_RQ2_WRAP 0x02 /* request 2 wraps */
1098 unsigned wrap = 0; /* bit mask: requests behind the disk head? */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099
Jens Axboe5e705372006-07-13 12:39:25 +02001100 if (rq1 == NULL || rq1 == rq2)
1101 return rq2;
1102 if (rq2 == NULL)
1103 return rq1;
Jens Axboe9c2c38a2005-08-24 14:57:54 +02001104
Namhyung Kim229836b2011-05-24 10:23:21 +02001105 if (rq_is_sync(rq1) != rq_is_sync(rq2))
1106 return rq_is_sync(rq1) ? rq1 : rq2;
1107
Christoph Hellwig65299a32011-08-23 14:50:29 +02001108 if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_PRIO)
1109 return rq1->cmd_flags & REQ_PRIO ? rq1 : rq2;
Jens Axboeb53d1ed2011-08-19 08:34:48 +02001110
Tejun Heo83096eb2009-05-07 22:24:39 +09001111 s1 = blk_rq_pos(rq1);
1112 s2 = blk_rq_pos(rq2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001113
Linus Torvalds1da177e2005-04-16 15:20:36 -07001114 /*
1115 * by definition, 1KiB is 2 sectors
1116 */
1117 back_max = cfqd->cfq_back_max * 2;
1118
1119 /*
1120 * Strict one way elevator _except_ in the case where we allow
1121 * short backward seeks which are biased as twice the cost of a
1122 * similar forward seek.
1123 */
1124 if (s1 >= last)
1125 d1 = s1 - last;
1126 else if (s1 + back_max >= last)
1127 d1 = (last - s1) * cfqd->cfq_back_penalty;
1128 else
Andreas Mohre8a99052006-03-28 08:59:49 +02001129 wrap |= CFQ_RQ1_WRAP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130
1131 if (s2 >= last)
1132 d2 = s2 - last;
1133 else if (s2 + back_max >= last)
1134 d2 = (last - s2) * cfqd->cfq_back_penalty;
1135 else
Andreas Mohre8a99052006-03-28 08:59:49 +02001136 wrap |= CFQ_RQ2_WRAP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137
1138 /* Found required data */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139
Andreas Mohre8a99052006-03-28 08:59:49 +02001140 /*
1141 * By doing switch() on the bit mask "wrap" we avoid having to
1142 * check two variables for all permutations: --> faster!
1143 */
1144 switch (wrap) {
Jens Axboe5e705372006-07-13 12:39:25 +02001145 case 0: /* common case for CFQ: rq1 and rq2 not wrapped */
Andreas Mohre8a99052006-03-28 08:59:49 +02001146 if (d1 < d2)
Jens Axboe5e705372006-07-13 12:39:25 +02001147 return rq1;
Andreas Mohre8a99052006-03-28 08:59:49 +02001148 else if (d2 < d1)
Jens Axboe5e705372006-07-13 12:39:25 +02001149 return rq2;
Andreas Mohre8a99052006-03-28 08:59:49 +02001150 else {
1151 if (s1 >= s2)
Jens Axboe5e705372006-07-13 12:39:25 +02001152 return rq1;
Andreas Mohre8a99052006-03-28 08:59:49 +02001153 else
Jens Axboe5e705372006-07-13 12:39:25 +02001154 return rq2;
Andreas Mohre8a99052006-03-28 08:59:49 +02001155 }
1156
1157 case CFQ_RQ2_WRAP:
Jens Axboe5e705372006-07-13 12:39:25 +02001158 return rq1;
Andreas Mohre8a99052006-03-28 08:59:49 +02001159 case CFQ_RQ1_WRAP:
Jens Axboe5e705372006-07-13 12:39:25 +02001160 return rq2;
1161 case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both rqs wrapped */
Andreas Mohre8a99052006-03-28 08:59:49 +02001162 default:
1163 /*
1164 * Since both rqs are wrapped,
1165 * start with the one that's further behind head
1166 * (--> only *one* back seek required),
1167 * since back seek takes more time than forward.
1168 */
1169 if (s1 <= s2)
Jens Axboe5e705372006-07-13 12:39:25 +02001170 return rq1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171 else
Jens Axboe5e705372006-07-13 12:39:25 +02001172 return rq2;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173 }
1174}
1175
Jens Axboe498d3aa22007-04-26 12:54:48 +02001176/*
1177 * The below is leftmost cache rbtree addon
1178 */
Jens Axboe08717142008-01-28 11:38:15 +01001179static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root)
Jens Axboecc09e292007-04-26 12:53:50 +02001180{
Vivek Goyal615f0252009-12-03 12:59:39 -05001181 /* Service tree is empty */
1182 if (!root->count)
1183 return NULL;
1184
Jens Axboecc09e292007-04-26 12:53:50 +02001185 if (!root->left)
1186 root->left = rb_first(&root->rb);
1187
Jens Axboe08717142008-01-28 11:38:15 +01001188 if (root->left)
1189 return rb_entry(root->left, struct cfq_queue, rb_node);
1190
1191 return NULL;
Jens Axboecc09e292007-04-26 12:53:50 +02001192}
1193
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05001194static struct cfq_group *cfq_rb_first_group(struct cfq_rb_root *root)
1195{
1196 if (!root->left)
1197 root->left = rb_first(&root->rb);
1198
1199 if (root->left)
1200 return rb_entry_cfqg(root->left);
1201
1202 return NULL;
1203}
1204
Jens Axboea36e71f2009-04-15 12:15:11 +02001205static void rb_erase_init(struct rb_node *n, struct rb_root *root)
1206{
1207 rb_erase(n, root);
1208 RB_CLEAR_NODE(n);
1209}
1210
Jens Axboecc09e292007-04-26 12:53:50 +02001211static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root)
1212{
1213 if (root->left == n)
1214 root->left = NULL;
Jens Axboea36e71f2009-04-15 12:15:11 +02001215 rb_erase_init(n, &root->rb);
Corrado Zoccoloaa6f6a32009-10-26 22:44:33 +01001216 --root->count;
Jens Axboecc09e292007-04-26 12:53:50 +02001217}
1218
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219/*
1220 * would be nice to take fifo expire time into account as well
1221 */
Jens Axboe5e705372006-07-13 12:39:25 +02001222static struct request *
1223cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1224 struct request *last)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225{
Jens Axboe21183b02006-07-13 12:33:14 +02001226 struct rb_node *rbnext = rb_next(&last->rb_node);
1227 struct rb_node *rbprev = rb_prev(&last->rb_node);
Jens Axboe5e705372006-07-13 12:39:25 +02001228 struct request *next = NULL, *prev = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229
Jens Axboe21183b02006-07-13 12:33:14 +02001230 BUG_ON(RB_EMPTY_NODE(&last->rb_node));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231
1232 if (rbprev)
Jens Axboe5e705372006-07-13 12:39:25 +02001233 prev = rb_entry_rq(rbprev);
Jens Axboe21183b02006-07-13 12:33:14 +02001234
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235 if (rbnext)
Jens Axboe5e705372006-07-13 12:39:25 +02001236 next = rb_entry_rq(rbnext);
Jens Axboe21183b02006-07-13 12:33:14 +02001237 else {
1238 rbnext = rb_first(&cfqq->sort_list);
1239 if (rbnext && rbnext != &last->rb_node)
Jens Axboe5e705372006-07-13 12:39:25 +02001240 next = rb_entry_rq(rbnext);
Jens Axboe21183b02006-07-13 12:33:14 +02001241 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242
Corrado Zoccolocf7c25c2009-11-08 17:16:46 +01001243 return cfq_choose_req(cfqd, next, prev, blk_rq_pos(last));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001244}
1245
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001246static u64 cfq_slice_offset(struct cfq_data *cfqd,
1247 struct cfq_queue *cfqq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001248{
Jens Axboed9e76202007-04-20 14:27:50 +02001249 /*
1250 * just an approximation, should be ok.
1251 */
Vivek Goyalcdb16e82009-12-03 12:59:38 -05001252 return (cfqq->cfqg->nr_cfqq - 1) * (cfq_prio_slice(cfqd, 1, 0) -
Jens Axboe464191c2009-11-30 09:38:13 +01001253 cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio));
Jens Axboed9e76202007-04-20 14:27:50 +02001254}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001255
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05001256static inline s64
1257cfqg_key(struct cfq_rb_root *st, struct cfq_group *cfqg)
1258{
1259 return cfqg->vdisktime - st->min_vdisktime;
1260}
1261
1262static void
1263__cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
1264{
1265 struct rb_node **node = &st->rb.rb_node;
1266 struct rb_node *parent = NULL;
1267 struct cfq_group *__cfqg;
1268 s64 key = cfqg_key(st, cfqg);
1269 int left = 1;
1270
1271 while (*node != NULL) {
1272 parent = *node;
1273 __cfqg = rb_entry_cfqg(parent);
1274
1275 if (key < cfqg_key(st, __cfqg))
1276 node = &parent->rb_left;
1277 else {
1278 node = &parent->rb_right;
1279 left = 0;
1280 }
1281 }
1282
1283 if (left)
1284 st->left = &cfqg->rb_node;
1285
1286 rb_link_node(&cfqg->rb_node, parent, node);
1287 rb_insert_color(&cfqg->rb_node, &st->rb);
1288}
1289
Toshiaki Makita7b5af5c2014-08-28 17:14:58 +09001290/*
1291 * This has to be called only on activation of cfqg
1292 */
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05001293static void
Justin TerAvest8184f932011-03-17 16:12:36 +01001294cfq_update_group_weight(struct cfq_group *cfqg)
1295{
Tejun Heo3381cb82012-04-01 14:38:44 -07001296 if (cfqg->new_weight) {
Justin TerAvest8184f932011-03-17 16:12:36 +01001297 cfqg->weight = cfqg->new_weight;
Tejun Heo3381cb82012-04-01 14:38:44 -07001298 cfqg->new_weight = 0;
Justin TerAvest8184f932011-03-17 16:12:36 +01001299 }
Toshiaki Makitae15693e2014-08-26 20:56:36 +09001300}
1301
1302static void
1303cfq_update_group_leaf_weight(struct cfq_group *cfqg)
1304{
1305 BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
Tejun Heoe71357e2013-01-09 08:05:10 -08001306
1307 if (cfqg->new_leaf_weight) {
1308 cfqg->leaf_weight = cfqg->new_leaf_weight;
1309 cfqg->new_leaf_weight = 0;
1310 }
Justin TerAvest8184f932011-03-17 16:12:36 +01001311}
1312
1313static void
1314cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
1315{
Tejun Heo1d3650f2013-01-09 08:05:11 -08001316 unsigned int vfr = 1 << CFQ_SERVICE_SHIFT; /* start with 1 */
Tejun Heo7918ffb2013-01-09 08:05:11 -08001317 struct cfq_group *pos = cfqg;
Tejun Heo1d3650f2013-01-09 08:05:11 -08001318 struct cfq_group *parent;
Tejun Heo7918ffb2013-01-09 08:05:11 -08001319 bool propagate;
1320
1321 /* add to the service tree */
Justin TerAvest8184f932011-03-17 16:12:36 +01001322 BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
1323
Toshiaki Makita7b5af5c2014-08-28 17:14:58 +09001324 /*
1325 * Update leaf_weight. We cannot update weight at this point
1326 * because cfqg might already have been activated and is
1327 * contributing its current weight to the parent's child_weight.
1328 */
Toshiaki Makitae15693e2014-08-26 20:56:36 +09001329 cfq_update_group_leaf_weight(cfqg);
Justin TerAvest8184f932011-03-17 16:12:36 +01001330 __cfq_group_service_tree_add(st, cfqg);
Tejun Heo7918ffb2013-01-09 08:05:11 -08001331
1332 /*
Tejun Heo1d3650f2013-01-09 08:05:11 -08001333 * Activate @cfqg and calculate the portion of vfraction @cfqg is
1334 * entitled to. vfraction is calculated by walking the tree
1335 * towards the root calculating the fraction it has at each level.
1336 * The compounded ratio is how much vfraction @cfqg owns.
1337 *
1338 * Start with the proportion tasks in this cfqg has against active
1339 * children cfqgs - its leaf_weight against children_weight.
Tejun Heo7918ffb2013-01-09 08:05:11 -08001340 */
1341 propagate = !pos->nr_active++;
1342 pos->children_weight += pos->leaf_weight;
Tejun Heo1d3650f2013-01-09 08:05:11 -08001343 vfr = vfr * pos->leaf_weight / pos->children_weight;
Tejun Heo7918ffb2013-01-09 08:05:11 -08001344
Tejun Heo1d3650f2013-01-09 08:05:11 -08001345 /*
1346 * Compound ->weight walking up the tree. Both activation and
1347 * vfraction calculation are done in the same loop. Propagation
1348 * stops once an already activated node is met. vfraction
1349 * calculation should always continue to the root.
1350 */
Tejun Heod02f7aa2013-01-09 08:05:11 -08001351 while ((parent = cfqg_parent(pos))) {
Tejun Heo1d3650f2013-01-09 08:05:11 -08001352 if (propagate) {
Toshiaki Makitae15693e2014-08-26 20:56:36 +09001353 cfq_update_group_weight(pos);
Tejun Heo1d3650f2013-01-09 08:05:11 -08001354 propagate = !parent->nr_active++;
1355 parent->children_weight += pos->weight;
1356 }
1357 vfr = vfr * pos->weight / parent->children_weight;
Tejun Heo7918ffb2013-01-09 08:05:11 -08001358 pos = parent;
1359 }
Tejun Heo1d3650f2013-01-09 08:05:11 -08001360
1361 cfqg->vfraction = max_t(unsigned, vfr, 1);
Justin TerAvest8184f932011-03-17 16:12:36 +01001362}
1363
1364static void
1365cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05001366{
1367 struct cfq_rb_root *st = &cfqd->grp_service_tree;
1368 struct cfq_group *__cfqg;
1369 struct rb_node *n;
1370
1371 cfqg->nr_cfqq++;
Gui Jianfeng760701b2010-11-30 20:52:47 +01001372 if (!RB_EMPTY_NODE(&cfqg->rb_node))
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05001373 return;
1374
1375 /*
1376 * Currently put the group at the end. Later implement something
1377 * so that groups get lesser vtime based on their weights, so that
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001378 * if group does not loose all if it was not continuously backlogged.
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05001379 */
1380 n = rb_last(&st->rb);
1381 if (n) {
1382 __cfqg = rb_entry_cfqg(n);
1383 cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY;
1384 } else
1385 cfqg->vdisktime = st->min_vdisktime;
Justin TerAvest8184f932011-03-17 16:12:36 +01001386 cfq_group_service_tree_add(st, cfqg);
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05001387}
1388
1389static void
Justin TerAvest8184f932011-03-17 16:12:36 +01001390cfq_group_service_tree_del(struct cfq_rb_root *st, struct cfq_group *cfqg)
1391{
Tejun Heo7918ffb2013-01-09 08:05:11 -08001392 struct cfq_group *pos = cfqg;
1393 bool propagate;
1394
1395 /*
1396 * Undo activation from cfq_group_service_tree_add(). Deactivate
1397 * @cfqg and propagate deactivation upwards.
1398 */
1399 propagate = !--pos->nr_active;
1400 pos->children_weight -= pos->leaf_weight;
1401
1402 while (propagate) {
Tejun Heod02f7aa2013-01-09 08:05:11 -08001403 struct cfq_group *parent = cfqg_parent(pos);
Tejun Heo7918ffb2013-01-09 08:05:11 -08001404
1405 /* @pos has 0 nr_active at this point */
1406 WARN_ON_ONCE(pos->children_weight);
Tejun Heo1d3650f2013-01-09 08:05:11 -08001407 pos->vfraction = 0;
Tejun Heo7918ffb2013-01-09 08:05:11 -08001408
1409 if (!parent)
1410 break;
1411
1412 propagate = !--parent->nr_active;
1413 parent->children_weight -= pos->weight;
1414 pos = parent;
1415 }
1416
1417 /* remove from the service tree */
Justin TerAvest8184f932011-03-17 16:12:36 +01001418 if (!RB_EMPTY_NODE(&cfqg->rb_node))
1419 cfq_rb_erase(&cfqg->rb_node, st);
1420}
1421
1422static void
1423cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05001424{
1425 struct cfq_rb_root *st = &cfqd->grp_service_tree;
1426
1427 BUG_ON(cfqg->nr_cfqq < 1);
1428 cfqg->nr_cfqq--;
Vivek Goyal25bc6b02009-12-03 12:59:43 -05001429
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05001430 /* If there are other cfq queues under this group, don't delete it */
1431 if (cfqg->nr_cfqq)
1432 return;
1433
Vivek Goyal2868ef72009-12-03 12:59:48 -05001434 cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
Justin TerAvest8184f932011-03-17 16:12:36 +01001435 cfq_group_service_tree_del(st, cfqg);
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04001436 cfqg->saved_wl_slice = 0;
Tejun Heo155fead2012-04-01 14:38:44 -07001437 cfqg_stats_update_dequeue(cfqg);
Vivek Goyaldae739e2009-12-03 12:59:45 -05001438}
1439
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001440static inline u64 cfq_cfqq_slice_usage(struct cfq_queue *cfqq,
1441 u64 *unaccounted_time)
Vivek Goyaldae739e2009-12-03 12:59:45 -05001442{
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001443 u64 slice_used;
1444 u64 now = ktime_get_ns();
Vivek Goyaldae739e2009-12-03 12:59:45 -05001445
1446 /*
1447 * Queue got expired before even a single request completed or
1448 * got expired immediately after first request completion.
1449 */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001450 if (!cfqq->slice_start || cfqq->slice_start == now) {
Vivek Goyaldae739e2009-12-03 12:59:45 -05001451 /*
1452 * Also charge the seek time incurred to the group, otherwise
1453 * if there are mutiple queues in the group, each can dispatch
1454 * a single request on seeky media and cause lots of seek time
1455 * and group will never know it.
1456 */
Jan Kara0b31c102016-06-28 09:04:02 +02001457 slice_used = max_t(u64, (now - cfqq->dispatch_start),
1458 jiffies_to_nsecs(1));
Vivek Goyaldae739e2009-12-03 12:59:45 -05001459 } else {
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001460 slice_used = now - cfqq->slice_start;
Justin TerAvest167400d2011-03-12 16:54:00 +01001461 if (slice_used > cfqq->allocated_slice) {
1462 *unaccounted_time = slice_used - cfqq->allocated_slice;
Vivek Goyalf75edf22009-12-03 12:59:53 -05001463 slice_used = cfqq->allocated_slice;
Justin TerAvest167400d2011-03-12 16:54:00 +01001464 }
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001465 if (cfqq->slice_start > cfqq->dispatch_start)
Justin TerAvest167400d2011-03-12 16:54:00 +01001466 *unaccounted_time += cfqq->slice_start -
1467 cfqq->dispatch_start;
Vivek Goyaldae739e2009-12-03 12:59:45 -05001468 }
1469
Vivek Goyaldae739e2009-12-03 12:59:45 -05001470 return slice_used;
1471}
1472
1473static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
Vivek Goyale5ff0822010-04-26 19:25:11 +02001474 struct cfq_queue *cfqq)
Vivek Goyaldae739e2009-12-03 12:59:45 -05001475{
1476 struct cfq_rb_root *st = &cfqd->grp_service_tree;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001477 u64 used_sl, charge, unaccounted_sl = 0;
Vivek Goyalf26bd1f2009-12-03 12:59:54 -05001478 int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg)
1479 - cfqg->service_tree_idle.count;
Tejun Heo1d3650f2013-01-09 08:05:11 -08001480 unsigned int vfr;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001481 u64 now = ktime_get_ns();
Vivek Goyaldae739e2009-12-03 12:59:45 -05001482
Vivek Goyalf26bd1f2009-12-03 12:59:54 -05001483 BUG_ON(nr_sync < 0);
Justin TerAvest167400d2011-03-12 16:54:00 +01001484 used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl);
Vivek Goyalf26bd1f2009-12-03 12:59:54 -05001485
Vivek Goyal02b35082010-08-23 12:23:53 +02001486 if (iops_mode(cfqd))
1487 charge = cfqq->slice_dispatch;
1488 else if (!cfq_cfqq_sync(cfqq) && !nr_sync)
1489 charge = cfqq->allocated_slice;
Vivek Goyaldae739e2009-12-03 12:59:45 -05001490
Tejun Heo1d3650f2013-01-09 08:05:11 -08001491 /*
1492 * Can't update vdisktime while on service tree and cfqg->vfraction
1493 * is valid only while on it. Cache vfr, leave the service tree,
1494 * update vdisktime and go back on. The re-addition to the tree
1495 * will also update the weights as necessary.
1496 */
1497 vfr = cfqg->vfraction;
Justin TerAvest8184f932011-03-17 16:12:36 +01001498 cfq_group_service_tree_del(st, cfqg);
Tejun Heo1d3650f2013-01-09 08:05:11 -08001499 cfqg->vdisktime += cfqg_scale_charge(charge, vfr);
Justin TerAvest8184f932011-03-17 16:12:36 +01001500 cfq_group_service_tree_add(st, cfqg);
Vivek Goyaldae739e2009-12-03 12:59:45 -05001501
1502 /* This group is being expired. Save the context */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001503 if (cfqd->workload_expires > now) {
1504 cfqg->saved_wl_slice = cfqd->workload_expires - now;
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04001505 cfqg->saved_wl_type = cfqd->serving_wl_type;
1506 cfqg->saved_wl_class = cfqd->serving_wl_class;
Vivek Goyaldae739e2009-12-03 12:59:45 -05001507 } else
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04001508 cfqg->saved_wl_slice = 0;
Vivek Goyal2868ef72009-12-03 12:59:48 -05001509
1510 cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
1511 st->min_vdisktime);
Joe Perchesfd16d262011-06-13 10:42:49 +02001512 cfq_log_cfqq(cfqq->cfqd, cfqq,
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001513 "sl_used=%llu disp=%llu charge=%llu iops=%u sect=%lu",
Joe Perchesfd16d262011-06-13 10:42:49 +02001514 used_sl, cfqq->slice_dispatch, charge,
1515 iops_mode(cfqd), cfqq->nr_sectors);
Tejun Heo155fead2012-04-01 14:38:44 -07001516 cfqg_stats_update_timeslice_used(cfqg, used_sl, unaccounted_sl);
1517 cfqg_stats_set_start_empty_time(cfqg);
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05001518}
1519
Tejun Heof51b8022012-03-05 13:15:05 -08001520/**
1521 * cfq_init_cfqg_base - initialize base part of a cfq_group
1522 * @cfqg: cfq_group to initialize
1523 *
1524 * Initialize the base part which is used whether %CONFIG_CFQ_GROUP_IOSCHED
1525 * is enabled or not.
1526 */
1527static void cfq_init_cfqg_base(struct cfq_group *cfqg)
1528{
1529 struct cfq_rb_root *st;
1530 int i, j;
1531
1532 for_each_cfqg_st(cfqg, i, j, st)
1533 *st = CFQ_RB_ROOT;
1534 RB_CLEAR_NODE(&cfqg->rb_node);
1535
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001536 cfqg->ttime.last_end_request = ktime_get_ns();
Tejun Heof51b8022012-03-05 13:15:05 -08001537}
1538
Vivek Goyal25fb5162009-12-03 12:59:46 -05001539#ifdef CONFIG_CFQ_GROUP_IOSCHED
Tejun Heo69d7fde2015-08-18 14:55:36 -07001540static int __cfq_set_weight(struct cgroup_subsys_state *css, u64 val,
1541 bool on_dfl, bool reset_dev, bool is_leaf_weight);
1542
Tejun Heo24bdb8e2015-08-18 14:55:22 -07001543static void cfqg_stats_exit(struct cfqg_stats *stats)
Peter Zijlstra90d38392013-11-12 19:42:14 -08001544{
Tejun Heo24bdb8e2015-08-18 14:55:22 -07001545 blkg_rwstat_exit(&stats->merged);
1546 blkg_rwstat_exit(&stats->service_time);
1547 blkg_rwstat_exit(&stats->wait_time);
1548 blkg_rwstat_exit(&stats->queued);
Tejun Heo24bdb8e2015-08-18 14:55:22 -07001549 blkg_stat_exit(&stats->time);
1550#ifdef CONFIG_DEBUG_BLK_CGROUP
1551 blkg_stat_exit(&stats->unaccounted_time);
1552 blkg_stat_exit(&stats->avg_queue_size_sum);
1553 blkg_stat_exit(&stats->avg_queue_size_samples);
1554 blkg_stat_exit(&stats->dequeue);
1555 blkg_stat_exit(&stats->group_wait_time);
1556 blkg_stat_exit(&stats->idle_time);
1557 blkg_stat_exit(&stats->empty_time);
1558#endif
1559}
1560
1561static int cfqg_stats_init(struct cfqg_stats *stats, gfp_t gfp)
1562{
Tejun Heo77ea7332015-08-18 14:55:24 -07001563 if (blkg_rwstat_init(&stats->merged, gfp) ||
Tejun Heo24bdb8e2015-08-18 14:55:22 -07001564 blkg_rwstat_init(&stats->service_time, gfp) ||
1565 blkg_rwstat_init(&stats->wait_time, gfp) ||
1566 blkg_rwstat_init(&stats->queued, gfp) ||
Tejun Heo24bdb8e2015-08-18 14:55:22 -07001567 blkg_stat_init(&stats->time, gfp))
1568 goto err;
Peter Zijlstra90d38392013-11-12 19:42:14 -08001569
1570#ifdef CONFIG_DEBUG_BLK_CGROUP
Tejun Heo24bdb8e2015-08-18 14:55:22 -07001571 if (blkg_stat_init(&stats->unaccounted_time, gfp) ||
1572 blkg_stat_init(&stats->avg_queue_size_sum, gfp) ||
1573 blkg_stat_init(&stats->avg_queue_size_samples, gfp) ||
1574 blkg_stat_init(&stats->dequeue, gfp) ||
1575 blkg_stat_init(&stats->group_wait_time, gfp) ||
1576 blkg_stat_init(&stats->idle_time, gfp) ||
1577 blkg_stat_init(&stats->empty_time, gfp))
1578 goto err;
Peter Zijlstra90d38392013-11-12 19:42:14 -08001579#endif
Tejun Heo24bdb8e2015-08-18 14:55:22 -07001580 return 0;
1581err:
1582 cfqg_stats_exit(stats);
1583 return -ENOMEM;
Peter Zijlstra90d38392013-11-12 19:42:14 -08001584}
1585
Tejun Heoe4a9bde2015-08-18 14:55:16 -07001586static struct blkcg_policy_data *cfq_cpd_alloc(gfp_t gfp)
1587{
1588 struct cfq_group_data *cgd;
1589
Tejun Heoebc4ff62016-11-10 11:16:37 -05001590 cgd = kzalloc(sizeof(*cgd), gfp);
Tejun Heoe4a9bde2015-08-18 14:55:16 -07001591 if (!cgd)
1592 return NULL;
1593 return &cgd->cpd;
1594}
1595
Tejun Heo81437642015-08-18 14:55:15 -07001596static void cfq_cpd_init(struct blkcg_policy_data *cpd)
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001597{
Tejun Heo81437642015-08-18 14:55:15 -07001598 struct cfq_group_data *cgd = cpd_to_cfqgd(cpd);
Tejun Heo9e10a132015-09-18 11:56:28 -04001599 unsigned int weight = cgroup_subsys_on_dfl(io_cgrp_subsys) ?
Tejun Heo69d7fde2015-08-18 14:55:36 -07001600 CGROUP_WEIGHT_DFL : CFQ_WEIGHT_LEGACY_DFL;
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001601
Tejun Heo69d7fde2015-08-18 14:55:36 -07001602 if (cpd_to_blkcg(cpd) == &blkcg_root)
1603 weight *= 2;
1604
1605 cgd->weight = weight;
1606 cgd->leaf_weight = weight;
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001607}
1608
Tejun Heoe4a9bde2015-08-18 14:55:16 -07001609static void cfq_cpd_free(struct blkcg_policy_data *cpd)
1610{
1611 kfree(cpd_to_cfqgd(cpd));
1612}
1613
Tejun Heo69d7fde2015-08-18 14:55:36 -07001614static void cfq_cpd_bind(struct blkcg_policy_data *cpd)
1615{
1616 struct blkcg *blkcg = cpd_to_blkcg(cpd);
Tejun Heo9e10a132015-09-18 11:56:28 -04001617 bool on_dfl = cgroup_subsys_on_dfl(io_cgrp_subsys);
Tejun Heo69d7fde2015-08-18 14:55:36 -07001618 unsigned int weight = on_dfl ? CGROUP_WEIGHT_DFL : CFQ_WEIGHT_LEGACY_DFL;
1619
1620 if (blkcg == &blkcg_root)
1621 weight *= 2;
1622
1623 WARN_ON_ONCE(__cfq_set_weight(&blkcg->css, weight, on_dfl, true, false));
1624 WARN_ON_ONCE(__cfq_set_weight(&blkcg->css, weight, on_dfl, true, true));
1625}
1626
Tejun Heo001bea72015-08-18 14:55:11 -07001627static struct blkg_policy_data *cfq_pd_alloc(gfp_t gfp, int node)
1628{
Tejun Heob2ce2642015-08-18 14:55:13 -07001629 struct cfq_group *cfqg;
1630
1631 cfqg = kzalloc_node(sizeof(*cfqg), gfp, node);
1632 if (!cfqg)
1633 return NULL;
1634
1635 cfq_init_cfqg_base(cfqg);
Tejun Heo24bdb8e2015-08-18 14:55:22 -07001636 if (cfqg_stats_init(&cfqg->stats, gfp)) {
1637 kfree(cfqg);
1638 return NULL;
1639 }
Tejun Heob2ce2642015-08-18 14:55:13 -07001640
1641 return &cfqg->pd;
Tejun Heo001bea72015-08-18 14:55:11 -07001642}
1643
Tejun Heoa9520cd2015-08-18 14:55:14 -07001644static void cfq_pd_init(struct blkg_policy_data *pd)
Vivek Goyalf469a7b2011-05-19 15:38:23 -04001645{
Tejun Heoa9520cd2015-08-18 14:55:14 -07001646 struct cfq_group *cfqg = pd_to_cfqg(pd);
1647 struct cfq_group_data *cgd = blkcg_to_cfqgd(pd->blkg->blkcg);
Vivek Goyal25fb5162009-12-03 12:59:46 -05001648
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001649 cfqg->weight = cgd->weight;
1650 cfqg->leaf_weight = cgd->leaf_weight;
Vivek Goyal25fb5162009-12-03 12:59:46 -05001651}
1652
Tejun Heoa9520cd2015-08-18 14:55:14 -07001653static void cfq_pd_offline(struct blkg_policy_data *pd)
Tejun Heo0b399202013-01-09 08:05:13 -08001654{
Tejun Heoa9520cd2015-08-18 14:55:14 -07001655 struct cfq_group *cfqg = pd_to_cfqg(pd);
Tejun Heo60a83702015-08-18 14:55:05 -07001656 int i;
1657
1658 for (i = 0; i < IOPRIO_BE_NR; i++) {
1659 if (cfqg->async_cfqq[0][i])
1660 cfq_put_queue(cfqg->async_cfqq[0][i]);
1661 if (cfqg->async_cfqq[1][i])
1662 cfq_put_queue(cfqg->async_cfqq[1][i]);
1663 }
1664
1665 if (cfqg->async_idle_cfqq)
1666 cfq_put_queue(cfqg->async_idle_cfqq);
1667
Tejun Heo0b399202013-01-09 08:05:13 -08001668 /*
1669 * @blkg is going offline and will be ignored by
1670 * blkg_[rw]stat_recursive_sum(). Transfer stats to the parent so
1671 * that they don't get lost. If IOs complete after this point, the
1672 * stats for them will be lost. Oh well...
1673 */
Tejun Heo60a83702015-08-18 14:55:05 -07001674 cfqg_stats_xfer_dead(cfqg);
Tejun Heo0b399202013-01-09 08:05:13 -08001675}
1676
Tejun Heo001bea72015-08-18 14:55:11 -07001677static void cfq_pd_free(struct blkg_policy_data *pd)
1678{
Tejun Heo24bdb8e2015-08-18 14:55:22 -07001679 struct cfq_group *cfqg = pd_to_cfqg(pd);
1680
1681 cfqg_stats_exit(&cfqg->stats);
1682 return kfree(cfqg);
Tejun Heo001bea72015-08-18 14:55:11 -07001683}
1684
Tejun Heoa9520cd2015-08-18 14:55:14 -07001685static void cfq_pd_reset_stats(struct blkg_policy_data *pd)
Tejun Heo689665a2013-01-09 08:05:13 -08001686{
Tejun Heoa9520cd2015-08-18 14:55:14 -07001687 struct cfq_group *cfqg = pd_to_cfqg(pd);
Tejun Heo689665a2013-01-09 08:05:13 -08001688
1689 cfqg_stats_reset(&cfqg->stats);
Vivek Goyal25fb5162009-12-03 12:59:46 -05001690}
1691
Tejun Heoae118892015-08-18 14:55:20 -07001692static struct cfq_group *cfq_lookup_cfqg(struct cfq_data *cfqd,
1693 struct blkcg *blkcg)
Vivek Goyal25fb5162009-12-03 12:59:46 -05001694{
Tejun Heoae118892015-08-18 14:55:20 -07001695 struct blkcg_gq *blkg;
Vivek Goyal25fb5162009-12-03 12:59:46 -05001696
Tejun Heoae118892015-08-18 14:55:20 -07001697 blkg = blkg_lookup(blkcg, cfqd->queue);
1698 if (likely(blkg))
1699 return blkg_to_cfqg(blkg);
1700 return NULL;
Vivek Goyal25fb5162009-12-03 12:59:46 -05001701}
1702
1703static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
1704{
Vivek Goyal25fb5162009-12-03 12:59:46 -05001705 cfqq->cfqg = cfqg;
Vivek Goyalb1c35762009-12-03 12:59:47 -05001706 /* cfqq reference on cfqg */
Tejun Heoeb7d8c072012-03-23 14:02:53 +01001707 cfqg_get(cfqg);
Vivek Goyalb1c35762009-12-03 12:59:47 -05001708}
1709
Tejun Heof95a04a2012-04-16 13:57:26 -07001710static u64 cfqg_prfill_weight_device(struct seq_file *sf,
1711 struct blkg_policy_data *pd, int off)
Tejun Heo60c2bc22012-04-01 14:38:43 -07001712{
Tejun Heof95a04a2012-04-16 13:57:26 -07001713 struct cfq_group *cfqg = pd_to_cfqg(pd);
Tejun Heo3381cb82012-04-01 14:38:44 -07001714
1715 if (!cfqg->dev_weight)
Tejun Heo60c2bc22012-04-01 14:38:43 -07001716 return 0;
Tejun Heof95a04a2012-04-16 13:57:26 -07001717 return __blkg_prfill_u64(sf, pd, cfqg->dev_weight);
Tejun Heo60c2bc22012-04-01 14:38:43 -07001718}
1719
Tejun Heo2da8ca82013-12-05 12:28:04 -05001720static int cfqg_print_weight_device(struct seq_file *sf, void *v)
Tejun Heo60c2bc22012-04-01 14:38:43 -07001721{
Tejun Heo2da8ca82013-12-05 12:28:04 -05001722 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1723 cfqg_prfill_weight_device, &blkcg_policy_cfq,
1724 0, false);
Tejun Heo60c2bc22012-04-01 14:38:43 -07001725 return 0;
1726}
1727
Tejun Heoe71357e2013-01-09 08:05:10 -08001728static u64 cfqg_prfill_leaf_weight_device(struct seq_file *sf,
1729 struct blkg_policy_data *pd, int off)
1730{
1731 struct cfq_group *cfqg = pd_to_cfqg(pd);
1732
1733 if (!cfqg->dev_leaf_weight)
1734 return 0;
1735 return __blkg_prfill_u64(sf, pd, cfqg->dev_leaf_weight);
1736}
1737
Tejun Heo2da8ca82013-12-05 12:28:04 -05001738static int cfqg_print_leaf_weight_device(struct seq_file *sf, void *v)
Tejun Heoe71357e2013-01-09 08:05:10 -08001739{
Tejun Heo2da8ca82013-12-05 12:28:04 -05001740 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1741 cfqg_prfill_leaf_weight_device, &blkcg_policy_cfq,
1742 0, false);
Tejun Heoe71357e2013-01-09 08:05:10 -08001743 return 0;
1744}
1745
Tejun Heo2da8ca82013-12-05 12:28:04 -05001746static int cfq_print_weight(struct seq_file *sf, void *v)
Tejun Heo60c2bc22012-04-01 14:38:43 -07001747{
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001748 struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
Jens Axboe9470e4a2015-06-19 10:19:36 -06001749 struct cfq_group_data *cgd = blkcg_to_cfqgd(blkcg);
1750 unsigned int val = 0;
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001751
Jens Axboe9470e4a2015-06-19 10:19:36 -06001752 if (cgd)
1753 val = cgd->weight;
1754
1755 seq_printf(sf, "%u\n", val);
Tejun Heo60c2bc22012-04-01 14:38:43 -07001756 return 0;
1757}
1758
Tejun Heo2da8ca82013-12-05 12:28:04 -05001759static int cfq_print_leaf_weight(struct seq_file *sf, void *v)
Tejun Heoe71357e2013-01-09 08:05:10 -08001760{
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001761 struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
Jens Axboe9470e4a2015-06-19 10:19:36 -06001762 struct cfq_group_data *cgd = blkcg_to_cfqgd(blkcg);
1763 unsigned int val = 0;
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001764
Jens Axboe9470e4a2015-06-19 10:19:36 -06001765 if (cgd)
1766 val = cgd->leaf_weight;
1767
1768 seq_printf(sf, "%u\n", val);
Tejun Heoe71357e2013-01-09 08:05:10 -08001769 return 0;
1770}
1771
Tejun Heo451af502014-05-13 12:16:21 -04001772static ssize_t __cfqg_set_weight_device(struct kernfs_open_file *of,
1773 char *buf, size_t nbytes, loff_t off,
Tejun Heo2ee867dc2015-08-18 14:55:34 -07001774 bool on_dfl, bool is_leaf_weight)
Tejun Heo60c2bc22012-04-01 14:38:43 -07001775{
Tejun Heo69d7fde2015-08-18 14:55:36 -07001776 unsigned int min = on_dfl ? CGROUP_WEIGHT_MIN : CFQ_WEIGHT_LEGACY_MIN;
1777 unsigned int max = on_dfl ? CGROUP_WEIGHT_MAX : CFQ_WEIGHT_LEGACY_MAX;
Tejun Heo451af502014-05-13 12:16:21 -04001778 struct blkcg *blkcg = css_to_blkcg(of_css(of));
Tejun Heo60c2bc22012-04-01 14:38:43 -07001779 struct blkg_conf_ctx ctx;
Tejun Heo3381cb82012-04-01 14:38:44 -07001780 struct cfq_group *cfqg;
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001781 struct cfq_group_data *cfqgd;
Tejun Heo60c2bc22012-04-01 14:38:43 -07001782 int ret;
Tejun Heo36aa9e52015-08-18 14:55:31 -07001783 u64 v;
Tejun Heo60c2bc22012-04-01 14:38:43 -07001784
Tejun Heo3c798392012-04-16 13:57:25 -07001785 ret = blkg_conf_prep(blkcg, &blkcg_policy_cfq, buf, &ctx);
Tejun Heo60c2bc22012-04-01 14:38:43 -07001786 if (ret)
1787 return ret;
1788
Tejun Heo2ee867dc2015-08-18 14:55:34 -07001789 if (sscanf(ctx.body, "%llu", &v) == 1) {
1790 /* require "default" on dfl */
1791 ret = -ERANGE;
1792 if (!v && on_dfl)
1793 goto out_finish;
1794 } else if (!strcmp(strim(ctx.body), "default")) {
1795 v = 0;
1796 } else {
1797 ret = -EINVAL;
Tejun Heo36aa9e52015-08-18 14:55:31 -07001798 goto out_finish;
Tejun Heo2ee867dc2015-08-18 14:55:34 -07001799 }
Tejun Heo36aa9e52015-08-18 14:55:31 -07001800
Tejun Heo3381cb82012-04-01 14:38:44 -07001801 cfqg = blkg_to_cfqg(ctx.blkg);
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001802 cfqgd = blkcg_to_cfqgd(blkcg);
Jens Axboeae994ea2015-06-20 10:26:50 -06001803
Tejun Heo20386ce2015-08-18 14:55:28 -07001804 ret = -ERANGE;
Tejun Heo69d7fde2015-08-18 14:55:36 -07001805 if (!v || (v >= min && v <= max)) {
Tejun Heoe71357e2013-01-09 08:05:10 -08001806 if (!is_leaf_weight) {
Tejun Heo36aa9e52015-08-18 14:55:31 -07001807 cfqg->dev_weight = v;
1808 cfqg->new_weight = v ?: cfqgd->weight;
Tejun Heoe71357e2013-01-09 08:05:10 -08001809 } else {
Tejun Heo36aa9e52015-08-18 14:55:31 -07001810 cfqg->dev_leaf_weight = v;
1811 cfqg->new_leaf_weight = v ?: cfqgd->leaf_weight;
Tejun Heoe71357e2013-01-09 08:05:10 -08001812 }
Tejun Heo60c2bc22012-04-01 14:38:43 -07001813 ret = 0;
1814 }
Tejun Heo36aa9e52015-08-18 14:55:31 -07001815out_finish:
Tejun Heo60c2bc22012-04-01 14:38:43 -07001816 blkg_conf_finish(&ctx);
Tejun Heo451af502014-05-13 12:16:21 -04001817 return ret ?: nbytes;
Tejun Heo60c2bc22012-04-01 14:38:43 -07001818}
1819
Tejun Heo451af502014-05-13 12:16:21 -04001820static ssize_t cfqg_set_weight_device(struct kernfs_open_file *of,
1821 char *buf, size_t nbytes, loff_t off)
Tejun Heoe71357e2013-01-09 08:05:10 -08001822{
Tejun Heo2ee867dc2015-08-18 14:55:34 -07001823 return __cfqg_set_weight_device(of, buf, nbytes, off, false, false);
Tejun Heoe71357e2013-01-09 08:05:10 -08001824}
1825
Tejun Heo451af502014-05-13 12:16:21 -04001826static ssize_t cfqg_set_leaf_weight_device(struct kernfs_open_file *of,
1827 char *buf, size_t nbytes, loff_t off)
Tejun Heoe71357e2013-01-09 08:05:10 -08001828{
Tejun Heo2ee867dc2015-08-18 14:55:34 -07001829 return __cfqg_set_weight_device(of, buf, nbytes, off, false, true);
Tejun Heoe71357e2013-01-09 08:05:10 -08001830}
1831
Tejun Heodd165eb2015-08-18 14:55:33 -07001832static int __cfq_set_weight(struct cgroup_subsys_state *css, u64 val,
Tejun Heo69d7fde2015-08-18 14:55:36 -07001833 bool on_dfl, bool reset_dev, bool is_leaf_weight)
Tejun Heo60c2bc22012-04-01 14:38:43 -07001834{
Tejun Heo69d7fde2015-08-18 14:55:36 -07001835 unsigned int min = on_dfl ? CGROUP_WEIGHT_MIN : CFQ_WEIGHT_LEGACY_MIN;
1836 unsigned int max = on_dfl ? CGROUP_WEIGHT_MAX : CFQ_WEIGHT_LEGACY_MAX;
Tejun Heo182446d2013-08-08 20:11:24 -04001837 struct blkcg *blkcg = css_to_blkcg(css);
Tejun Heo3c798392012-04-16 13:57:25 -07001838 struct blkcg_gq *blkg;
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001839 struct cfq_group_data *cfqgd;
Jens Axboeae994ea2015-06-20 10:26:50 -06001840 int ret = 0;
Tejun Heo60c2bc22012-04-01 14:38:43 -07001841
Tejun Heo69d7fde2015-08-18 14:55:36 -07001842 if (val < min || val > max)
1843 return -ERANGE;
Tejun Heo60c2bc22012-04-01 14:38:43 -07001844
1845 spin_lock_irq(&blkcg->lock);
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001846 cfqgd = blkcg_to_cfqgd(blkcg);
Jens Axboeae994ea2015-06-20 10:26:50 -06001847 if (!cfqgd) {
1848 ret = -EINVAL;
1849 goto out;
1850 }
Tejun Heoe71357e2013-01-09 08:05:10 -08001851
1852 if (!is_leaf_weight)
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001853 cfqgd->weight = val;
Tejun Heoe71357e2013-01-09 08:05:10 -08001854 else
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001855 cfqgd->leaf_weight = val;
Tejun Heo60c2bc22012-04-01 14:38:43 -07001856
Sasha Levinb67bfe02013-02-27 17:06:00 -08001857 hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
Tejun Heo3381cb82012-04-01 14:38:44 -07001858 struct cfq_group *cfqg = blkg_to_cfqg(blkg);
Tejun Heo60c2bc22012-04-01 14:38:43 -07001859
Tejun Heoe71357e2013-01-09 08:05:10 -08001860 if (!cfqg)
1861 continue;
1862
1863 if (!is_leaf_weight) {
Tejun Heo69d7fde2015-08-18 14:55:36 -07001864 if (reset_dev)
1865 cfqg->dev_weight = 0;
Tejun Heoe71357e2013-01-09 08:05:10 -08001866 if (!cfqg->dev_weight)
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001867 cfqg->new_weight = cfqgd->weight;
Tejun Heoe71357e2013-01-09 08:05:10 -08001868 } else {
Tejun Heo69d7fde2015-08-18 14:55:36 -07001869 if (reset_dev)
1870 cfqg->dev_leaf_weight = 0;
Tejun Heoe71357e2013-01-09 08:05:10 -08001871 if (!cfqg->dev_leaf_weight)
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001872 cfqg->new_leaf_weight = cfqgd->leaf_weight;
Tejun Heoe71357e2013-01-09 08:05:10 -08001873 }
Tejun Heo60c2bc22012-04-01 14:38:43 -07001874 }
1875
Jens Axboeae994ea2015-06-20 10:26:50 -06001876out:
Tejun Heo60c2bc22012-04-01 14:38:43 -07001877 spin_unlock_irq(&blkcg->lock);
Jens Axboeae994ea2015-06-20 10:26:50 -06001878 return ret;
Tejun Heo60c2bc22012-04-01 14:38:43 -07001879}
1880
Tejun Heo182446d2013-08-08 20:11:24 -04001881static int cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft,
1882 u64 val)
Tejun Heoe71357e2013-01-09 08:05:10 -08001883{
Tejun Heo69d7fde2015-08-18 14:55:36 -07001884 return __cfq_set_weight(css, val, false, false, false);
Tejun Heoe71357e2013-01-09 08:05:10 -08001885}
1886
Tejun Heo182446d2013-08-08 20:11:24 -04001887static int cfq_set_leaf_weight(struct cgroup_subsys_state *css,
1888 struct cftype *cft, u64 val)
Tejun Heoe71357e2013-01-09 08:05:10 -08001889{
Tejun Heo69d7fde2015-08-18 14:55:36 -07001890 return __cfq_set_weight(css, val, false, false, true);
Tejun Heoe71357e2013-01-09 08:05:10 -08001891}
1892
Tejun Heo2da8ca82013-12-05 12:28:04 -05001893static int cfqg_print_stat(struct seq_file *sf, void *v)
Tejun Heo5bc4afb12012-04-01 14:38:45 -07001894{
Tejun Heo2da8ca82013-12-05 12:28:04 -05001895 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
1896 &blkcg_policy_cfq, seq_cft(sf)->private, false);
Tejun Heo5bc4afb12012-04-01 14:38:45 -07001897 return 0;
1898}
1899
Tejun Heo2da8ca82013-12-05 12:28:04 -05001900static int cfqg_print_rwstat(struct seq_file *sf, void *v)
Tejun Heo5bc4afb12012-04-01 14:38:45 -07001901{
Tejun Heo2da8ca82013-12-05 12:28:04 -05001902 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
1903 &blkcg_policy_cfq, seq_cft(sf)->private, true);
Tejun Heo5bc4afb12012-04-01 14:38:45 -07001904 return 0;
1905}
1906
Tejun Heo43114012013-01-09 08:05:13 -08001907static u64 cfqg_prfill_stat_recursive(struct seq_file *sf,
1908 struct blkg_policy_data *pd, int off)
1909{
Tejun Heof12c74c2015-08-18 14:55:23 -07001910 u64 sum = blkg_stat_recursive_sum(pd_to_blkg(pd),
1911 &blkcg_policy_cfq, off);
Tejun Heo43114012013-01-09 08:05:13 -08001912 return __blkg_prfill_u64(sf, pd, sum);
1913}
1914
1915static u64 cfqg_prfill_rwstat_recursive(struct seq_file *sf,
1916 struct blkg_policy_data *pd, int off)
1917{
Tejun Heof12c74c2015-08-18 14:55:23 -07001918 struct blkg_rwstat sum = blkg_rwstat_recursive_sum(pd_to_blkg(pd),
1919 &blkcg_policy_cfq, off);
Tejun Heo43114012013-01-09 08:05:13 -08001920 return __blkg_prfill_rwstat(sf, pd, &sum);
1921}
1922
Tejun Heo2da8ca82013-12-05 12:28:04 -05001923static int cfqg_print_stat_recursive(struct seq_file *sf, void *v)
Tejun Heo43114012013-01-09 08:05:13 -08001924{
Tejun Heo2da8ca82013-12-05 12:28:04 -05001925 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1926 cfqg_prfill_stat_recursive, &blkcg_policy_cfq,
1927 seq_cft(sf)->private, false);
Tejun Heo43114012013-01-09 08:05:13 -08001928 return 0;
1929}
1930
Tejun Heo2da8ca82013-12-05 12:28:04 -05001931static int cfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
Tejun Heo43114012013-01-09 08:05:13 -08001932{
Tejun Heo2da8ca82013-12-05 12:28:04 -05001933 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1934 cfqg_prfill_rwstat_recursive, &blkcg_policy_cfq,
1935 seq_cft(sf)->private, true);
Tejun Heo43114012013-01-09 08:05:13 -08001936 return 0;
1937}
1938
Tejun Heo702747c2015-08-18 14:55:25 -07001939static u64 cfqg_prfill_sectors(struct seq_file *sf, struct blkg_policy_data *pd,
1940 int off)
1941{
1942 u64 sum = blkg_rwstat_total(&pd->blkg->stat_bytes);
1943
1944 return __blkg_prfill_u64(sf, pd, sum >> 9);
1945}
1946
1947static int cfqg_print_stat_sectors(struct seq_file *sf, void *v)
1948{
1949 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1950 cfqg_prfill_sectors, &blkcg_policy_cfq, 0, false);
1951 return 0;
1952}
1953
1954static u64 cfqg_prfill_sectors_recursive(struct seq_file *sf,
1955 struct blkg_policy_data *pd, int off)
1956{
1957 struct blkg_rwstat tmp = blkg_rwstat_recursive_sum(pd->blkg, NULL,
1958 offsetof(struct blkcg_gq, stat_bytes));
1959 u64 sum = atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) +
1960 atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]);
1961
1962 return __blkg_prfill_u64(sf, pd, sum >> 9);
1963}
1964
1965static int cfqg_print_stat_sectors_recursive(struct seq_file *sf, void *v)
1966{
1967 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1968 cfqg_prfill_sectors_recursive, &blkcg_policy_cfq, 0,
1969 false);
1970 return 0;
1971}
1972
Tejun Heo60c2bc22012-04-01 14:38:43 -07001973#ifdef CONFIG_DEBUG_BLK_CGROUP
Tejun Heof95a04a2012-04-16 13:57:26 -07001974static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf,
1975 struct blkg_policy_data *pd, int off)
Tejun Heo60c2bc22012-04-01 14:38:43 -07001976{
Tejun Heof95a04a2012-04-16 13:57:26 -07001977 struct cfq_group *cfqg = pd_to_cfqg(pd);
Tejun Heo155fead2012-04-01 14:38:44 -07001978 u64 samples = blkg_stat_read(&cfqg->stats.avg_queue_size_samples);
Tejun Heo60c2bc22012-04-01 14:38:43 -07001979 u64 v = 0;
1980
1981 if (samples) {
Tejun Heo155fead2012-04-01 14:38:44 -07001982 v = blkg_stat_read(&cfqg->stats.avg_queue_size_sum);
Anatol Pomozovf3cff252013-09-22 12:43:47 -06001983 v = div64_u64(v, samples);
Tejun Heo60c2bc22012-04-01 14:38:43 -07001984 }
Tejun Heof95a04a2012-04-16 13:57:26 -07001985 __blkg_prfill_u64(sf, pd, v);
Tejun Heo60c2bc22012-04-01 14:38:43 -07001986 return 0;
1987}
1988
1989/* print avg_queue_size */
Tejun Heo2da8ca82013-12-05 12:28:04 -05001990static int cfqg_print_avg_queue_size(struct seq_file *sf, void *v)
Tejun Heo60c2bc22012-04-01 14:38:43 -07001991{
Tejun Heo2da8ca82013-12-05 12:28:04 -05001992 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1993 cfqg_prfill_avg_queue_size, &blkcg_policy_cfq,
1994 0, false);
Tejun Heo60c2bc22012-04-01 14:38:43 -07001995 return 0;
1996}
1997#endif /* CONFIG_DEBUG_BLK_CGROUP */
1998
Tejun Heo880f50e2015-08-18 14:55:30 -07001999static struct cftype cfq_blkcg_legacy_files[] = {
Tejun Heo1d3650f2013-01-09 08:05:11 -08002000 /* on root, weight is mapped to leaf_weight */
Tejun Heo60c2bc22012-04-01 14:38:43 -07002001 {
2002 .name = "weight_device",
Tejun Heo1d3650f2013-01-09 08:05:11 -08002003 .flags = CFTYPE_ONLY_ON_ROOT,
Tejun Heo2da8ca82013-12-05 12:28:04 -05002004 .seq_show = cfqg_print_leaf_weight_device,
Tejun Heo451af502014-05-13 12:16:21 -04002005 .write = cfqg_set_leaf_weight_device,
Tejun Heo1d3650f2013-01-09 08:05:11 -08002006 },
2007 {
2008 .name = "weight",
2009 .flags = CFTYPE_ONLY_ON_ROOT,
Tejun Heo2da8ca82013-12-05 12:28:04 -05002010 .seq_show = cfq_print_leaf_weight,
Tejun Heo1d3650f2013-01-09 08:05:11 -08002011 .write_u64 = cfq_set_leaf_weight,
2012 },
2013
2014 /* no such mapping necessary for !roots */
2015 {
2016 .name = "weight_device",
2017 .flags = CFTYPE_NOT_ON_ROOT,
Tejun Heo2da8ca82013-12-05 12:28:04 -05002018 .seq_show = cfqg_print_weight_device,
Tejun Heo451af502014-05-13 12:16:21 -04002019 .write = cfqg_set_weight_device,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002020 },
2021 {
2022 .name = "weight",
Tejun Heoe71357e2013-01-09 08:05:10 -08002023 .flags = CFTYPE_NOT_ON_ROOT,
Tejun Heo2da8ca82013-12-05 12:28:04 -05002024 .seq_show = cfq_print_weight,
Tejun Heo3381cb82012-04-01 14:38:44 -07002025 .write_u64 = cfq_set_weight,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002026 },
Tejun Heo1d3650f2013-01-09 08:05:11 -08002027
2028 {
2029 .name = "leaf_weight_device",
Tejun Heo2da8ca82013-12-05 12:28:04 -05002030 .seq_show = cfqg_print_leaf_weight_device,
Tejun Heo451af502014-05-13 12:16:21 -04002031 .write = cfqg_set_leaf_weight_device,
Tejun Heoe71357e2013-01-09 08:05:10 -08002032 },
2033 {
2034 .name = "leaf_weight",
Tejun Heo2da8ca82013-12-05 12:28:04 -05002035 .seq_show = cfq_print_leaf_weight,
Tejun Heoe71357e2013-01-09 08:05:10 -08002036 .write_u64 = cfq_set_leaf_weight,
2037 },
2038
Tejun Heo43114012013-01-09 08:05:13 -08002039 /* statistics, covers only the tasks in the cfqg */
Tejun Heo60c2bc22012-04-01 14:38:43 -07002040 {
2041 .name = "time",
Tejun Heo5bc4afb12012-04-01 14:38:45 -07002042 .private = offsetof(struct cfq_group, stats.time),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002043 .seq_show = cfqg_print_stat,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002044 },
2045 {
2046 .name = "sectors",
Tejun Heo702747c2015-08-18 14:55:25 -07002047 .seq_show = cfqg_print_stat_sectors,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002048 },
2049 {
2050 .name = "io_service_bytes",
Tejun Heo77ea7332015-08-18 14:55:24 -07002051 .private = (unsigned long)&blkcg_policy_cfq,
2052 .seq_show = blkg_print_stat_bytes,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002053 },
2054 {
2055 .name = "io_serviced",
Tejun Heo77ea7332015-08-18 14:55:24 -07002056 .private = (unsigned long)&blkcg_policy_cfq,
2057 .seq_show = blkg_print_stat_ios,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002058 },
2059 {
2060 .name = "io_service_time",
Tejun Heo5bc4afb12012-04-01 14:38:45 -07002061 .private = offsetof(struct cfq_group, stats.service_time),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002062 .seq_show = cfqg_print_rwstat,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002063 },
2064 {
2065 .name = "io_wait_time",
Tejun Heo5bc4afb12012-04-01 14:38:45 -07002066 .private = offsetof(struct cfq_group, stats.wait_time),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002067 .seq_show = cfqg_print_rwstat,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002068 },
2069 {
2070 .name = "io_merged",
Tejun Heo5bc4afb12012-04-01 14:38:45 -07002071 .private = offsetof(struct cfq_group, stats.merged),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002072 .seq_show = cfqg_print_rwstat,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002073 },
2074 {
2075 .name = "io_queued",
Tejun Heo5bc4afb12012-04-01 14:38:45 -07002076 .private = offsetof(struct cfq_group, stats.queued),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002077 .seq_show = cfqg_print_rwstat,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002078 },
Tejun Heo43114012013-01-09 08:05:13 -08002079
2080 /* the same statictics which cover the cfqg and its descendants */
2081 {
2082 .name = "time_recursive",
2083 .private = offsetof(struct cfq_group, stats.time),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002084 .seq_show = cfqg_print_stat_recursive,
Tejun Heo43114012013-01-09 08:05:13 -08002085 },
2086 {
2087 .name = "sectors_recursive",
Tejun Heo702747c2015-08-18 14:55:25 -07002088 .seq_show = cfqg_print_stat_sectors_recursive,
Tejun Heo43114012013-01-09 08:05:13 -08002089 },
2090 {
2091 .name = "io_service_bytes_recursive",
Tejun Heo77ea7332015-08-18 14:55:24 -07002092 .private = (unsigned long)&blkcg_policy_cfq,
2093 .seq_show = blkg_print_stat_bytes_recursive,
Tejun Heo43114012013-01-09 08:05:13 -08002094 },
2095 {
2096 .name = "io_serviced_recursive",
Tejun Heo77ea7332015-08-18 14:55:24 -07002097 .private = (unsigned long)&blkcg_policy_cfq,
2098 .seq_show = blkg_print_stat_ios_recursive,
Tejun Heo43114012013-01-09 08:05:13 -08002099 },
2100 {
2101 .name = "io_service_time_recursive",
2102 .private = offsetof(struct cfq_group, stats.service_time),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002103 .seq_show = cfqg_print_rwstat_recursive,
Tejun Heo43114012013-01-09 08:05:13 -08002104 },
2105 {
2106 .name = "io_wait_time_recursive",
2107 .private = offsetof(struct cfq_group, stats.wait_time),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002108 .seq_show = cfqg_print_rwstat_recursive,
Tejun Heo43114012013-01-09 08:05:13 -08002109 },
2110 {
2111 .name = "io_merged_recursive",
2112 .private = offsetof(struct cfq_group, stats.merged),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002113 .seq_show = cfqg_print_rwstat_recursive,
Tejun Heo43114012013-01-09 08:05:13 -08002114 },
2115 {
2116 .name = "io_queued_recursive",
2117 .private = offsetof(struct cfq_group, stats.queued),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002118 .seq_show = cfqg_print_rwstat_recursive,
Tejun Heo43114012013-01-09 08:05:13 -08002119 },
Tejun Heo60c2bc22012-04-01 14:38:43 -07002120#ifdef CONFIG_DEBUG_BLK_CGROUP
2121 {
2122 .name = "avg_queue_size",
Tejun Heo2da8ca82013-12-05 12:28:04 -05002123 .seq_show = cfqg_print_avg_queue_size,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002124 },
2125 {
2126 .name = "group_wait_time",
Tejun Heo5bc4afb12012-04-01 14:38:45 -07002127 .private = offsetof(struct cfq_group, stats.group_wait_time),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002128 .seq_show = cfqg_print_stat,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002129 },
2130 {
2131 .name = "idle_time",
Tejun Heo5bc4afb12012-04-01 14:38:45 -07002132 .private = offsetof(struct cfq_group, stats.idle_time),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002133 .seq_show = cfqg_print_stat,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002134 },
2135 {
2136 .name = "empty_time",
Tejun Heo5bc4afb12012-04-01 14:38:45 -07002137 .private = offsetof(struct cfq_group, stats.empty_time),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002138 .seq_show = cfqg_print_stat,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002139 },
2140 {
2141 .name = "dequeue",
Tejun Heo5bc4afb12012-04-01 14:38:45 -07002142 .private = offsetof(struct cfq_group, stats.dequeue),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002143 .seq_show = cfqg_print_stat,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002144 },
2145 {
2146 .name = "unaccounted_time",
Tejun Heo5bc4afb12012-04-01 14:38:45 -07002147 .private = offsetof(struct cfq_group, stats.unaccounted_time),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002148 .seq_show = cfqg_print_stat,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002149 },
2150#endif /* CONFIG_DEBUG_BLK_CGROUP */
2151 { } /* terminate */
2152};
Tejun Heo2ee867dc2015-08-18 14:55:34 -07002153
2154static int cfq_print_weight_on_dfl(struct seq_file *sf, void *v)
2155{
2156 struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
2157 struct cfq_group_data *cgd = blkcg_to_cfqgd(blkcg);
2158
2159 seq_printf(sf, "default %u\n", cgd->weight);
2160 blkcg_print_blkgs(sf, blkcg, cfqg_prfill_weight_device,
2161 &blkcg_policy_cfq, 0, false);
2162 return 0;
2163}
2164
2165static ssize_t cfq_set_weight_on_dfl(struct kernfs_open_file *of,
2166 char *buf, size_t nbytes, loff_t off)
2167{
2168 char *endp;
2169 int ret;
2170 u64 v;
2171
2172 buf = strim(buf);
2173
2174 /* "WEIGHT" or "default WEIGHT" sets the default weight */
2175 v = simple_strtoull(buf, &endp, 0);
2176 if (*endp == '\0' || sscanf(buf, "default %llu", &v) == 1) {
Tejun Heo69d7fde2015-08-18 14:55:36 -07002177 ret = __cfq_set_weight(of_css(of), v, true, false, false);
Tejun Heo2ee867dc2015-08-18 14:55:34 -07002178 return ret ?: nbytes;
2179 }
2180
2181 /* "MAJ:MIN WEIGHT" */
2182 return __cfqg_set_weight_device(of, buf, nbytes, off, true, false);
2183}
2184
2185static struct cftype cfq_blkcg_files[] = {
2186 {
2187 .name = "weight",
2188 .flags = CFTYPE_NOT_ON_ROOT,
2189 .seq_show = cfq_print_weight_on_dfl,
2190 .write = cfq_set_weight_on_dfl,
2191 },
2192 { } /* terminate */
2193};
2194
Vivek Goyal25fb5162009-12-03 12:59:46 -05002195#else /* GROUP_IOSCHED */
Tejun Heoae118892015-08-18 14:55:20 -07002196static struct cfq_group *cfq_lookup_cfqg(struct cfq_data *cfqd,
2197 struct blkcg *blkcg)
Vivek Goyal25fb5162009-12-03 12:59:46 -05002198{
Tejun Heof51b8022012-03-05 13:15:05 -08002199 return cfqd->root_group;
Vivek Goyal25fb5162009-12-03 12:59:46 -05002200}
Vivek Goyal7f1dc8a2010-04-21 17:44:16 +02002201
Vivek Goyal25fb5162009-12-03 12:59:46 -05002202static inline void
2203cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) {
2204 cfqq->cfqg = cfqg;
2205}
2206
2207#endif /* GROUP_IOSCHED */
2208
Jens Axboe498d3aa22007-04-26 12:54:48 +02002209/*
Corrado Zoccoloc0324a02009-10-27 19:16:03 +01002210 * The cfqd->service_trees holds all pending cfq_queue's that have
Jens Axboe498d3aa22007-04-26 12:54:48 +02002211 * requests waiting to be processed. It is sorted in the order that
2212 * we will service the queues.
2213 */
Jens Axboea36e71f2009-04-15 12:15:11 +02002214static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
Jens Axboea6151c32009-10-07 20:02:57 +02002215 bool add_front)
Jens Axboed9e76202007-04-20 14:27:50 +02002216{
Jens Axboe08717142008-01-28 11:38:15 +01002217 struct rb_node **p, *parent;
2218 struct cfq_queue *__cfqq;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06002219 u64 rb_key;
Vivek Goyal34b98d02012-10-03 16:56:58 -04002220 struct cfq_rb_root *st;
Jens Axboe498d3aa22007-04-26 12:54:48 +02002221 int left;
Vivek Goyaldae739e2009-12-03 12:59:45 -05002222 int new_cfqq = 1;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06002223 u64 now = ktime_get_ns();
Vivek Goyalae30c282009-12-03 12:59:55 -05002224
Vivek Goyal34b98d02012-10-03 16:56:58 -04002225 st = st_for(cfqq->cfqg, cfqq_class(cfqq), cfqq_type(cfqq));
Jens Axboe08717142008-01-28 11:38:15 +01002226 if (cfq_class_idle(cfqq)) {
2227 rb_key = CFQ_IDLE_DELAY;
Vivek Goyal34b98d02012-10-03 16:56:58 -04002228 parent = rb_last(&st->rb);
Jens Axboe08717142008-01-28 11:38:15 +01002229 if (parent && parent != &cfqq->rb_node) {
2230 __cfqq = rb_entry(parent, struct cfq_queue, rb_node);
2231 rb_key += __cfqq->rb_key;
2232 } else
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06002233 rb_key += now;
Jens Axboe08717142008-01-28 11:38:15 +01002234 } else if (!add_front) {
Jens Axboeb9c89462009-10-06 20:53:44 +02002235 /*
2236 * Get our rb key offset. Subtract any residual slice
2237 * value carried from last service. A negative resid
2238 * count indicates slice overrun, and this should position
2239 * the next service time further away in the tree.
2240 */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06002241 rb_key = cfq_slice_offset(cfqd, cfqq) + now;
Jens Axboeb9c89462009-10-06 20:53:44 +02002242 rb_key -= cfqq->slice_resid;
Jens Axboeedd75ff2007-04-19 12:03:34 +02002243 cfqq->slice_resid = 0;
Corrado Zoccolo48e025e2009-10-05 08:49:23 +02002244 } else {
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06002245 rb_key = -NSEC_PER_SEC;
Vivek Goyal34b98d02012-10-03 16:56:58 -04002246 __cfqq = cfq_rb_first(st);
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06002247 rb_key += __cfqq ? __cfqq->rb_key : now;
Corrado Zoccolo48e025e2009-10-05 08:49:23 +02002248 }
Jens Axboed9e76202007-04-20 14:27:50 +02002249
2250 if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
Vivek Goyaldae739e2009-12-03 12:59:45 -05002251 new_cfqq = 0;
Jens Axboe99f96282007-02-05 11:56:25 +01002252 /*
Jens Axboed9e76202007-04-20 14:27:50 +02002253 * same position, nothing more to do
Jens Axboe99f96282007-02-05 11:56:25 +01002254 */
Vivek Goyal34b98d02012-10-03 16:56:58 -04002255 if (rb_key == cfqq->rb_key && cfqq->service_tree == st)
Jens Axboed9e76202007-04-20 14:27:50 +02002256 return;
Jens Axboe53b037442006-07-28 09:48:51 +02002257
Corrado Zoccoloaa6f6a32009-10-26 22:44:33 +01002258 cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree);
2259 cfqq->service_tree = NULL;
Jens Axboe22e2c502005-06-27 10:55:12 +02002260 }
Jens Axboed9e76202007-04-20 14:27:50 +02002261
Jens Axboe498d3aa22007-04-26 12:54:48 +02002262 left = 1;
Jens Axboe08717142008-01-28 11:38:15 +01002263 parent = NULL;
Vivek Goyal34b98d02012-10-03 16:56:58 -04002264 cfqq->service_tree = st;
2265 p = &st->rb.rb_node;
Jens Axboed9e76202007-04-20 14:27:50 +02002266 while (*p) {
2267 parent = *p;
2268 __cfqq = rb_entry(parent, struct cfq_queue, rb_node);
2269
Jens Axboe0c534e02007-04-18 20:01:57 +02002270 /*
Corrado Zoccoloc0324a02009-10-27 19:16:03 +01002271 * sort by key, that represents service time.
Jens Axboe0c534e02007-04-18 20:01:57 +02002272 */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06002273 if (rb_key < __cfqq->rb_key)
Vivek Goyal1f23f122012-10-03 16:57:00 -04002274 p = &parent->rb_left;
Corrado Zoccoloc0324a02009-10-27 19:16:03 +01002275 else {
Vivek Goyal1f23f122012-10-03 16:57:00 -04002276 p = &parent->rb_right;
Jens Axboecc09e292007-04-26 12:53:50 +02002277 left = 0;
Corrado Zoccoloc0324a02009-10-27 19:16:03 +01002278 }
Jens Axboed9e76202007-04-20 14:27:50 +02002279 }
2280
Jens Axboecc09e292007-04-26 12:53:50 +02002281 if (left)
Vivek Goyal34b98d02012-10-03 16:56:58 -04002282 st->left = &cfqq->rb_node;
Jens Axboecc09e292007-04-26 12:53:50 +02002283
Jens Axboed9e76202007-04-20 14:27:50 +02002284 cfqq->rb_key = rb_key;
2285 rb_link_node(&cfqq->rb_node, parent, p);
Vivek Goyal34b98d02012-10-03 16:56:58 -04002286 rb_insert_color(&cfqq->rb_node, &st->rb);
2287 st->count++;
Namhyung Kim20359f22011-05-24 10:23:22 +02002288 if (add_front || !new_cfqq)
Vivek Goyaldae739e2009-12-03 12:59:45 -05002289 return;
Justin TerAvest8184f932011-03-17 16:12:36 +01002290 cfq_group_notify_queue_add(cfqd, cfqq->cfqg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002291}
2292
Jens Axboea36e71f2009-04-15 12:15:11 +02002293static struct cfq_queue *
Jens Axboef2d1f0a2009-04-23 12:19:38 +02002294cfq_prio_tree_lookup(struct cfq_data *cfqd, struct rb_root *root,
2295 sector_t sector, struct rb_node **ret_parent,
2296 struct rb_node ***rb_link)
Jens Axboea36e71f2009-04-15 12:15:11 +02002297{
Jens Axboea36e71f2009-04-15 12:15:11 +02002298 struct rb_node **p, *parent;
2299 struct cfq_queue *cfqq = NULL;
2300
2301 parent = NULL;
2302 p = &root->rb_node;
2303 while (*p) {
2304 struct rb_node **n;
2305
2306 parent = *p;
2307 cfqq = rb_entry(parent, struct cfq_queue, p_node);
2308
2309 /*
2310 * Sort strictly based on sector. Smallest to the left,
2311 * largest to the right.
2312 */
Tejun Heo2e46e8b2009-05-07 22:24:41 +09002313 if (sector > blk_rq_pos(cfqq->next_rq))
Jens Axboea36e71f2009-04-15 12:15:11 +02002314 n = &(*p)->rb_right;
Tejun Heo2e46e8b2009-05-07 22:24:41 +09002315 else if (sector < blk_rq_pos(cfqq->next_rq))
Jens Axboea36e71f2009-04-15 12:15:11 +02002316 n = &(*p)->rb_left;
2317 else
2318 break;
2319 p = n;
Jens Axboe3ac6c9f2009-04-23 12:14:56 +02002320 cfqq = NULL;
Jens Axboea36e71f2009-04-15 12:15:11 +02002321 }
2322
2323 *ret_parent = parent;
2324 if (rb_link)
2325 *rb_link = p;
Jens Axboe3ac6c9f2009-04-23 12:14:56 +02002326 return cfqq;
Jens Axboea36e71f2009-04-15 12:15:11 +02002327}
2328
2329static void cfq_prio_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq)
2330{
Jens Axboea36e71f2009-04-15 12:15:11 +02002331 struct rb_node **p, *parent;
2332 struct cfq_queue *__cfqq;
2333
Jens Axboef2d1f0a2009-04-23 12:19:38 +02002334 if (cfqq->p_root) {
2335 rb_erase(&cfqq->p_node, cfqq->p_root);
2336 cfqq->p_root = NULL;
2337 }
Jens Axboea36e71f2009-04-15 12:15:11 +02002338
2339 if (cfq_class_idle(cfqq))
2340 return;
2341 if (!cfqq->next_rq)
2342 return;
2343
Jens Axboef2d1f0a2009-04-23 12:19:38 +02002344 cfqq->p_root = &cfqd->prio_trees[cfqq->org_ioprio];
Tejun Heo2e46e8b2009-05-07 22:24:41 +09002345 __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root,
2346 blk_rq_pos(cfqq->next_rq), &parent, &p);
Jens Axboe3ac6c9f2009-04-23 12:14:56 +02002347 if (!__cfqq) {
2348 rb_link_node(&cfqq->p_node, parent, p);
Jens Axboef2d1f0a2009-04-23 12:19:38 +02002349 rb_insert_color(&cfqq->p_node, cfqq->p_root);
2350 } else
2351 cfqq->p_root = NULL;
Jens Axboea36e71f2009-04-15 12:15:11 +02002352}
2353
Jens Axboe498d3aa22007-04-26 12:54:48 +02002354/*
2355 * Update cfqq's position in the service tree.
2356 */
Jens Axboeedd75ff2007-04-19 12:03:34 +02002357static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq)
Jens Axboe6d048f52007-04-25 12:44:27 +02002358{
Jens Axboe6d048f52007-04-25 12:44:27 +02002359 /*
2360 * Resorting requires the cfqq to be on the RR list already.
2361 */
Jens Axboea36e71f2009-04-15 12:15:11 +02002362 if (cfq_cfqq_on_rr(cfqq)) {
Jens Axboeedd75ff2007-04-19 12:03:34 +02002363 cfq_service_tree_add(cfqd, cfqq, 0);
Jens Axboea36e71f2009-04-15 12:15:11 +02002364 cfq_prio_tree_add(cfqd, cfqq);
2365 }
Jens Axboe6d048f52007-04-25 12:44:27 +02002366}
2367
Linus Torvalds1da177e2005-04-16 15:20:36 -07002368/*
2369 * add to busy list of queues for service, trying to be fair in ordering
Jens Axboe22e2c502005-06-27 10:55:12 +02002370 * the pending list according to last request service
Linus Torvalds1da177e2005-04-16 15:20:36 -07002371 */
Jens Axboefebffd62008-01-28 13:19:43 +01002372static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002373{
Jens Axboe7b679132008-05-30 12:23:07 +02002374 cfq_log_cfqq(cfqd, cfqq, "add_to_rr");
Jens Axboe3b181522005-06-27 10:56:24 +02002375 BUG_ON(cfq_cfqq_on_rr(cfqq));
2376 cfq_mark_cfqq_on_rr(cfqq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002377 cfqd->busy_queues++;
Shaohua Lief8a41d2011-03-07 09:26:29 +01002378 if (cfq_cfqq_sync(cfqq))
2379 cfqd->busy_sync_queues++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002380
Jens Axboeedd75ff2007-04-19 12:03:34 +02002381 cfq_resort_rr_list(cfqd, cfqq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002382}
2383
Jens Axboe498d3aa22007-04-26 12:54:48 +02002384/*
2385 * Called when the cfqq no longer has requests pending, remove it from
2386 * the service tree.
2387 */
Jens Axboefebffd62008-01-28 13:19:43 +01002388static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002389{
Jens Axboe7b679132008-05-30 12:23:07 +02002390 cfq_log_cfqq(cfqd, cfqq, "del_from_rr");
Jens Axboe3b181522005-06-27 10:56:24 +02002391 BUG_ON(!cfq_cfqq_on_rr(cfqq));
2392 cfq_clear_cfqq_on_rr(cfqq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002393
Corrado Zoccoloaa6f6a32009-10-26 22:44:33 +01002394 if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
2395 cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree);
2396 cfqq->service_tree = NULL;
2397 }
Jens Axboef2d1f0a2009-04-23 12:19:38 +02002398 if (cfqq->p_root) {
2399 rb_erase(&cfqq->p_node, cfqq->p_root);
2400 cfqq->p_root = NULL;
2401 }
Jens Axboed9e76202007-04-20 14:27:50 +02002402
Justin TerAvest8184f932011-03-17 16:12:36 +01002403 cfq_group_notify_queue_del(cfqd, cfqq->cfqg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002404 BUG_ON(!cfqd->busy_queues);
2405 cfqd->busy_queues--;
Shaohua Lief8a41d2011-03-07 09:26:29 +01002406 if (cfq_cfqq_sync(cfqq))
2407 cfqd->busy_sync_queues--;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002408}
2409
2410/*
2411 * rb tree support functions
2412 */
Jens Axboefebffd62008-01-28 13:19:43 +01002413static void cfq_del_rq_rb(struct request *rq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002414{
Jens Axboe5e705372006-07-13 12:39:25 +02002415 struct cfq_queue *cfqq = RQ_CFQQ(rq);
Jens Axboe5e705372006-07-13 12:39:25 +02002416 const int sync = rq_is_sync(rq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002417
Jens Axboeb4878f22005-10-20 16:42:29 +02002418 BUG_ON(!cfqq->queued[sync]);
2419 cfqq->queued[sync]--;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002420
Jens Axboe5e705372006-07-13 12:39:25 +02002421 elv_rb_del(&cfqq->sort_list, rq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002422
Vivek Goyalf04a6422009-12-03 12:59:40 -05002423 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) {
2424 /*
2425 * Queue will be deleted from service tree when we actually
2426 * expire it later. Right now just remove it from prio tree
2427 * as it is empty.
2428 */
2429 if (cfqq->p_root) {
2430 rb_erase(&cfqq->p_node, cfqq->p_root);
2431 cfqq->p_root = NULL;
2432 }
2433 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002434}
2435
Jens Axboe5e705372006-07-13 12:39:25 +02002436static void cfq_add_rq_rb(struct request *rq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002437{
Jens Axboe5e705372006-07-13 12:39:25 +02002438 struct cfq_queue *cfqq = RQ_CFQQ(rq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002439 struct cfq_data *cfqd = cfqq->cfqd;
Jeff Moyer796d5112011-06-02 21:19:05 +02002440 struct request *prev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002441
Jens Axboe5380a102006-07-13 12:37:56 +02002442 cfqq->queued[rq_is_sync(rq)]++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002443
Jeff Moyer796d5112011-06-02 21:19:05 +02002444 elv_rb_add(&cfqq->sort_list, rq);
Jens Axboe5fccbf62006-10-31 14:21:55 +01002445
2446 if (!cfq_cfqq_on_rr(cfqq))
2447 cfq_add_cfqq_rr(cfqd, cfqq);
Jens Axboe5044eed2007-04-25 11:53:48 +02002448
2449 /*
2450 * check if this request is a better next-serve candidate
2451 */
Jens Axboea36e71f2009-04-15 12:15:11 +02002452 prev = cfqq->next_rq;
Corrado Zoccolocf7c25c2009-11-08 17:16:46 +01002453 cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq, cfqd->last_position);
Jens Axboea36e71f2009-04-15 12:15:11 +02002454
2455 /*
2456 * adjust priority tree position, if ->next_rq changes
2457 */
2458 if (prev != cfqq->next_rq)
2459 cfq_prio_tree_add(cfqd, cfqq);
2460
Jens Axboe5044eed2007-04-25 11:53:48 +02002461 BUG_ON(!cfqq->next_rq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002462}
2463
Jens Axboefebffd62008-01-28 13:19:43 +01002464static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002465{
Jens Axboe5380a102006-07-13 12:37:56 +02002466 elv_rb_del(&cfqq->sort_list, rq);
2467 cfqq->queued[rq_is_sync(rq)]--;
Christoph Hellwigef295ec2016-10-28 08:48:16 -06002468 cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags);
Jens Axboe5e705372006-07-13 12:39:25 +02002469 cfq_add_rq_rb(rq);
Tejun Heo155fead2012-04-01 14:38:44 -07002470 cfqg_stats_update_io_add(RQ_CFQG(rq), cfqq->cfqd->serving_group,
Christoph Hellwigef295ec2016-10-28 08:48:16 -06002471 rq->cmd_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002472}
2473
Jens Axboe206dc692006-03-28 13:03:44 +02002474static struct request *
2475cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002476{
Jens Axboe206dc692006-03-28 13:03:44 +02002477 struct task_struct *tsk = current;
Tejun Heoc5869802011-12-14 00:33:41 +01002478 struct cfq_io_cq *cic;
Jens Axboe206dc692006-03-28 13:03:44 +02002479 struct cfq_queue *cfqq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002480
Jens Axboe4ac845a2008-01-24 08:44:49 +01002481 cic = cfq_cic_lookup(cfqd, tsk->io_context);
Vasily Tarasov91fac312007-04-25 12:29:51 +02002482 if (!cic)
2483 return NULL;
2484
Christoph Hellwigaa39ebd2016-11-01 07:40:02 -06002485 cfqq = cic_to_cfqq(cic, op_is_sync(bio->bi_opf));
Kent Overstreetf73a1c72012-09-25 15:05:12 -07002486 if (cfqq)
2487 return elv_rb_find(&cfqq->sort_list, bio_end_sector(bio));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002488
Linus Torvalds1da177e2005-04-16 15:20:36 -07002489 return NULL;
2490}
2491
Jens Axboe165125e2007-07-24 09:28:11 +02002492static void cfq_activate_request(struct request_queue *q, struct request *rq)
Jens Axboeb4878f22005-10-20 16:42:29 +02002493{
2494 struct cfq_data *cfqd = q->elevator->elevator_data;
2495
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01002496 cfqd->rq_in_driver++;
Jens Axboe7b679132008-05-30 12:23:07 +02002497 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01002498 cfqd->rq_in_driver);
Jens Axboe25776e32006-06-01 10:12:26 +02002499
Tejun Heo5b936292009-05-07 22:24:38 +09002500 cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
Jens Axboeb4878f22005-10-20 16:42:29 +02002501}
2502
Jens Axboe165125e2007-07-24 09:28:11 +02002503static void cfq_deactivate_request(struct request_queue *q, struct request *rq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002504{
Jens Axboe22e2c502005-06-27 10:55:12 +02002505 struct cfq_data *cfqd = q->elevator->elevator_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002506
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01002507 WARN_ON(!cfqd->rq_in_driver);
2508 cfqd->rq_in_driver--;
Jens Axboe7b679132008-05-30 12:23:07 +02002509 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d",
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01002510 cfqd->rq_in_driver);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002511}
2512
Jens Axboeb4878f22005-10-20 16:42:29 +02002513static void cfq_remove_request(struct request *rq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002514{
Jens Axboe5e705372006-07-13 12:39:25 +02002515 struct cfq_queue *cfqq = RQ_CFQQ(rq);
Jens Axboe21183b02006-07-13 12:33:14 +02002516
Jens Axboe5e705372006-07-13 12:39:25 +02002517 if (cfqq->next_rq == rq)
2518 cfqq->next_rq = cfq_find_next_rq(cfqq->cfqd, cfqq, rq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002519
Jens Axboeb4878f22005-10-20 16:42:29 +02002520 list_del_init(&rq->queuelist);
Jens Axboe5e705372006-07-13 12:39:25 +02002521 cfq_del_rq_rb(rq);
Jens Axboe374f84a2006-07-23 01:42:19 +02002522
Aaron Carroll45333d52008-08-26 15:52:36 +02002523 cfqq->cfqd->rq_queued--;
Christoph Hellwigef295ec2016-10-28 08:48:16 -06002524 cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags);
Christoph Hellwig65299a32011-08-23 14:50:29 +02002525 if (rq->cmd_flags & REQ_PRIO) {
2526 WARN_ON(!cfqq->prio_pending);
2527 cfqq->prio_pending--;
Jens Axboeb53d1ed2011-08-19 08:34:48 +02002528 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002529}
2530
Christoph Hellwig34fe7c02017-02-08 14:46:48 +01002531static enum elv_merge cfq_merge(struct request_queue *q, struct request **req,
Jens Axboe165125e2007-07-24 09:28:11 +02002532 struct bio *bio)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002533{
2534 struct cfq_data *cfqd = q->elevator->elevator_data;
2535 struct request *__rq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002536
Jens Axboe206dc692006-03-28 13:03:44 +02002537 __rq = cfq_find_rq_fmerge(cfqd, bio);
Tahsin Erdogan72ef7992016-07-07 11:48:22 -07002538 if (__rq && elv_bio_merge_ok(__rq, bio)) {
Jens Axboe98170642006-07-28 09:23:08 +02002539 *req = __rq;
2540 return ELEVATOR_FRONT_MERGE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002541 }
2542
2543 return ELEVATOR_NO_MERGE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002544}
2545
Jens Axboe165125e2007-07-24 09:28:11 +02002546static void cfq_merged_request(struct request_queue *q, struct request *req,
Christoph Hellwig34fe7c02017-02-08 14:46:48 +01002547 enum elv_merge type)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548{
Jens Axboe21183b02006-07-13 12:33:14 +02002549 if (type == ELEVATOR_FRONT_MERGE) {
Jens Axboe5e705372006-07-13 12:39:25 +02002550 struct cfq_queue *cfqq = RQ_CFQQ(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002551
Jens Axboe5e705372006-07-13 12:39:25 +02002552 cfq_reposition_rq_rb(cfqq, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002553 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002554}
2555
Divyesh Shah812d4022010-04-08 21:14:23 -07002556static void cfq_bio_merged(struct request_queue *q, struct request *req,
2557 struct bio *bio)
2558{
Christoph Hellwigef295ec2016-10-28 08:48:16 -06002559 cfqg_stats_update_io_merged(RQ_CFQG(req), bio->bi_opf);
Divyesh Shah812d4022010-04-08 21:14:23 -07002560}
2561
Linus Torvalds1da177e2005-04-16 15:20:36 -07002562static void
Jens Axboe165125e2007-07-24 09:28:11 +02002563cfq_merged_requests(struct request_queue *q, struct request *rq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002564 struct request *next)
2565{
Corrado Zoccolocf7c25c2009-11-08 17:16:46 +01002566 struct cfq_queue *cfqq = RQ_CFQQ(rq);
Shaohua Li4a0b75c2011-12-16 14:00:22 +01002567 struct cfq_data *cfqd = q->elevator->elevator_data;
2568
Jens Axboe22e2c502005-06-27 10:55:12 +02002569 /*
2570 * reposition in fifo if next is older than rq
2571 */
2572 if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06002573 next->fifo_time < rq->fifo_time &&
Shaohua Li3d106fba2012-11-06 12:39:51 +01002574 cfqq == RQ_CFQQ(next)) {
Jens Axboe22e2c502005-06-27 10:55:12 +02002575 list_move(&rq->queuelist, &next->queuelist);
Jan Kara8b4922d2014-02-24 16:39:52 +01002576 rq->fifo_time = next->fifo_time;
Jens Axboe30996f42009-10-05 11:03:39 +02002577 }
Jens Axboe22e2c502005-06-27 10:55:12 +02002578
Corrado Zoccolocf7c25c2009-11-08 17:16:46 +01002579 if (cfqq->next_rq == next)
2580 cfqq->next_rq = rq;
Jens Axboeb4878f22005-10-20 16:42:29 +02002581 cfq_remove_request(next);
Christoph Hellwigef295ec2016-10-28 08:48:16 -06002582 cfqg_stats_update_io_merged(RQ_CFQG(rq), next->cmd_flags);
Shaohua Li4a0b75c2011-12-16 14:00:22 +01002583
2584 cfqq = RQ_CFQQ(next);
2585 /*
2586 * all requests of this queue are merged to other queues, delete it
2587 * from the service tree. If it's the active_queue,
2588 * cfq_dispatch_requests() will choose to expire it or do idle
2589 */
2590 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list) &&
2591 cfqq != cfqd->active_queue)
2592 cfq_del_cfqq_rr(cfqd, cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02002593}
2594
Tahsin Erdogan72ef7992016-07-07 11:48:22 -07002595static int cfq_allow_bio_merge(struct request_queue *q, struct request *rq,
2596 struct bio *bio)
Jens Axboeda775262006-12-20 11:04:12 +01002597{
2598 struct cfq_data *cfqd = q->elevator->elevator_data;
Christoph Hellwigaa39ebd2016-11-01 07:40:02 -06002599 bool is_sync = op_is_sync(bio->bi_opf);
Tejun Heoc5869802011-12-14 00:33:41 +01002600 struct cfq_io_cq *cic;
Jens Axboeda775262006-12-20 11:04:12 +01002601 struct cfq_queue *cfqq;
Jens Axboeda775262006-12-20 11:04:12 +01002602
2603 /*
Jens Axboeec8acb62007-01-02 18:32:11 +01002604 * Disallow merge of a sync bio into an async request.
Jens Axboeda775262006-12-20 11:04:12 +01002605 */
Christoph Hellwigaa39ebd2016-11-01 07:40:02 -06002606 if (is_sync && !rq_is_sync(rq))
Jens Axboea6151c32009-10-07 20:02:57 +02002607 return false;
Jens Axboeda775262006-12-20 11:04:12 +01002608
2609 /*
Tejun Heof1a4f4d2011-12-14 00:33:39 +01002610 * Lookup the cfqq that this bio will be queued with and allow
Tejun Heo07c2bd32012-02-08 09:19:42 +01002611 * merge only if rq is queued there.
Jens Axboeda775262006-12-20 11:04:12 +01002612 */
Tejun Heo07c2bd32012-02-08 09:19:42 +01002613 cic = cfq_cic_lookup(cfqd, current->io_context);
2614 if (!cic)
2615 return false;
Jens Axboe719d3402006-12-22 09:38:53 +01002616
Christoph Hellwigaa39ebd2016-11-01 07:40:02 -06002617 cfqq = cic_to_cfqq(cic, is_sync);
Jens Axboea6151c32009-10-07 20:02:57 +02002618 return cfqq == RQ_CFQQ(rq);
Jens Axboeda775262006-12-20 11:04:12 +01002619}
2620
Tahsin Erdogan72ef7992016-07-07 11:48:22 -07002621static int cfq_allow_rq_merge(struct request_queue *q, struct request *rq,
2622 struct request *next)
2623{
2624 return RQ_CFQQ(rq) == RQ_CFQQ(next);
2625}
2626
Divyesh Shah812df482010-04-08 21:15:35 -07002627static inline void cfq_del_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
2628{
Jan Kara91148322016-06-08 15:11:39 +02002629 hrtimer_try_to_cancel(&cfqd->idle_slice_timer);
Tejun Heo155fead2012-04-01 14:38:44 -07002630 cfqg_stats_update_idle_time(cfqq->cfqg);
Divyesh Shah812df482010-04-08 21:15:35 -07002631}
2632
Jens Axboefebffd62008-01-28 13:19:43 +01002633static void __cfq_set_active_queue(struct cfq_data *cfqd,
2634 struct cfq_queue *cfqq)
Jens Axboe22e2c502005-06-27 10:55:12 +02002635{
2636 if (cfqq) {
Vivek Goyal3bf10fe2012-10-03 16:56:56 -04002637 cfq_log_cfqq(cfqd, cfqq, "set_active wl_class:%d wl_type:%d",
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04002638 cfqd->serving_wl_class, cfqd->serving_wl_type);
Tejun Heo155fead2012-04-01 14:38:44 -07002639 cfqg_stats_update_avg_queue_size(cfqq->cfqg);
Justin TerAvest62a37f62011-03-23 08:25:44 +01002640 cfqq->slice_start = 0;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06002641 cfqq->dispatch_start = ktime_get_ns();
Justin TerAvest62a37f62011-03-23 08:25:44 +01002642 cfqq->allocated_slice = 0;
2643 cfqq->slice_end = 0;
2644 cfqq->slice_dispatch = 0;
2645 cfqq->nr_sectors = 0;
2646
2647 cfq_clear_cfqq_wait_request(cfqq);
2648 cfq_clear_cfqq_must_dispatch(cfqq);
2649 cfq_clear_cfqq_must_alloc_slice(cfqq);
2650 cfq_clear_cfqq_fifo_expire(cfqq);
2651 cfq_mark_cfqq_slice_new(cfqq);
2652
2653 cfq_del_timer(cfqd, cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02002654 }
2655
2656 cfqd->active_queue = cfqq;
2657}
2658
2659/*
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002660 * current cfqq expired its slice (or was too idle), select new one
2661 */
2662static void
2663__cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
Vivek Goyale5ff0822010-04-26 19:25:11 +02002664 bool timed_out)
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002665{
Jens Axboe7b679132008-05-30 12:23:07 +02002666 cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out);
2667
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002668 if (cfq_cfqq_wait_request(cfqq))
Divyesh Shah812df482010-04-08 21:15:35 -07002669 cfq_del_timer(cfqd, cfqq);
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002670
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002671 cfq_clear_cfqq_wait_request(cfqq);
Vivek Goyalf75edf22009-12-03 12:59:53 -05002672 cfq_clear_cfqq_wait_busy(cfqq);
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002673
2674 /*
Shaohua Liae54abe2010-02-05 13:11:45 +01002675 * If this cfqq is shared between multiple processes, check to
2676 * make sure that those processes are still issuing I/Os within
2677 * the mean seek distance. If not, it may be time to break the
2678 * queues apart again.
2679 */
2680 if (cfq_cfqq_coop(cfqq) && CFQQ_SEEKY(cfqq))
2681 cfq_mark_cfqq_split_coop(cfqq);
2682
2683 /*
Jens Axboe6084cdd2007-04-23 08:25:00 +02002684 * store what was left of this slice, if the queue idled/timed out
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002685 */
Shaohua Lic553f8e2011-01-14 08:41:03 +01002686 if (timed_out) {
2687 if (cfq_cfqq_slice_new(cfqq))
Vivek Goyalba5bd522011-01-19 08:25:02 -07002688 cfqq->slice_resid = cfq_scaled_cfqq_slice(cfqd, cfqq);
Shaohua Lic553f8e2011-01-14 08:41:03 +01002689 else
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06002690 cfqq->slice_resid = cfqq->slice_end - ktime_get_ns();
Jan Kara93fdf142016-06-28 09:04:00 +02002691 cfq_log_cfqq(cfqd, cfqq, "resid=%lld", cfqq->slice_resid);
Jens Axboe7b679132008-05-30 12:23:07 +02002692 }
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002693
Vivek Goyale5ff0822010-04-26 19:25:11 +02002694 cfq_group_served(cfqd, cfqq->cfqg, cfqq);
Vivek Goyaldae739e2009-12-03 12:59:45 -05002695
Vivek Goyalf04a6422009-12-03 12:59:40 -05002696 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list))
2697 cfq_del_cfqq_rr(cfqd, cfqq);
2698
Jens Axboeedd75ff2007-04-19 12:03:34 +02002699 cfq_resort_rr_list(cfqd, cfqq);
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002700
2701 if (cfqq == cfqd->active_queue)
2702 cfqd->active_queue = NULL;
2703
2704 if (cfqd->active_cic) {
Tejun Heo11a31222012-02-07 07:51:30 +01002705 put_io_context(cfqd->active_cic->icq.ioc);
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002706 cfqd->active_cic = NULL;
2707 }
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002708}
2709
Vivek Goyale5ff0822010-04-26 19:25:11 +02002710static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out)
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002711{
2712 struct cfq_queue *cfqq = cfqd->active_queue;
2713
2714 if (cfqq)
Vivek Goyale5ff0822010-04-26 19:25:11 +02002715 __cfq_slice_expired(cfqd, cfqq, timed_out);
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002716}
2717
Jens Axboe498d3aa22007-04-26 12:54:48 +02002718/*
2719 * Get next queue for service. Unless we have a queue preemption,
2720 * we'll simply select the first cfqq in the service tree.
2721 */
Jens Axboe6d048f52007-04-25 12:44:27 +02002722static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
Jens Axboe22e2c502005-06-27 10:55:12 +02002723{
Vivek Goyal34b98d02012-10-03 16:56:58 -04002724 struct cfq_rb_root *st = st_for(cfqd->serving_group,
2725 cfqd->serving_wl_class, cfqd->serving_wl_type);
Jens Axboeedd75ff2007-04-19 12:03:34 +02002726
Vivek Goyalf04a6422009-12-03 12:59:40 -05002727 if (!cfqd->rq_queued)
2728 return NULL;
2729
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05002730 /* There is nothing to dispatch */
Vivek Goyal34b98d02012-10-03 16:56:58 -04002731 if (!st)
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05002732 return NULL;
Vivek Goyal34b98d02012-10-03 16:56:58 -04002733 if (RB_EMPTY_ROOT(&st->rb))
Corrado Zoccoloc0324a02009-10-27 19:16:03 +01002734 return NULL;
Vivek Goyal34b98d02012-10-03 16:56:58 -04002735 return cfq_rb_first(st);
Jens Axboe6d048f52007-04-25 12:44:27 +02002736}
2737
Vivek Goyalf04a6422009-12-03 12:59:40 -05002738static struct cfq_queue *cfq_get_next_queue_forced(struct cfq_data *cfqd)
2739{
Vivek Goyal25fb5162009-12-03 12:59:46 -05002740 struct cfq_group *cfqg;
Vivek Goyalf04a6422009-12-03 12:59:40 -05002741 struct cfq_queue *cfqq;
2742 int i, j;
2743 struct cfq_rb_root *st;
2744
2745 if (!cfqd->rq_queued)
2746 return NULL;
2747
Vivek Goyal25fb5162009-12-03 12:59:46 -05002748 cfqg = cfq_get_next_cfqg(cfqd);
2749 if (!cfqg)
2750 return NULL;
2751
Markus Elfring1cf41752017-01-21 22:44:07 +01002752 for_each_cfqg_st(cfqg, i, j, st) {
2753 cfqq = cfq_rb_first(st);
2754 if (cfqq)
Vivek Goyalf04a6422009-12-03 12:59:40 -05002755 return cfqq;
Markus Elfring1cf41752017-01-21 22:44:07 +01002756 }
Vivek Goyalf04a6422009-12-03 12:59:40 -05002757 return NULL;
2758}
2759
Jens Axboe498d3aa22007-04-26 12:54:48 +02002760/*
2761 * Get and set a new active queue for service.
2762 */
Jens Axboea36e71f2009-04-15 12:15:11 +02002763static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd,
2764 struct cfq_queue *cfqq)
Jens Axboe6d048f52007-04-25 12:44:27 +02002765{
Jens Axboee00ef792009-11-04 08:54:55 +01002766 if (!cfqq)
Jens Axboea36e71f2009-04-15 12:15:11 +02002767 cfqq = cfq_get_next_queue(cfqd);
Jens Axboe6d048f52007-04-25 12:44:27 +02002768
Jens Axboe22e2c502005-06-27 10:55:12 +02002769 __cfq_set_active_queue(cfqd, cfqq);
Jens Axboe3b181522005-06-27 10:56:24 +02002770 return cfqq;
Jens Axboe22e2c502005-06-27 10:55:12 +02002771}
2772
Jens Axboed9e76202007-04-20 14:27:50 +02002773static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
2774 struct request *rq)
2775{
Tejun Heo83096eb2009-05-07 22:24:39 +09002776 if (blk_rq_pos(rq) >= cfqd->last_position)
2777 return blk_rq_pos(rq) - cfqd->last_position;
Jens Axboed9e76202007-04-20 14:27:50 +02002778 else
Tejun Heo83096eb2009-05-07 22:24:39 +09002779 return cfqd->last_position - blk_rq_pos(rq);
Jens Axboed9e76202007-04-20 14:27:50 +02002780}
2781
Jeff Moyerb2c18e12009-10-23 17:14:49 -04002782static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq,
Shaohua Lie9ce3352010-03-19 08:03:04 +01002783 struct request *rq)
Jens Axboe6d048f52007-04-25 12:44:27 +02002784{
Shaohua Lie9ce3352010-03-19 08:03:04 +01002785 return cfq_dist_from_last(cfqd, rq) <= CFQQ_CLOSE_THR;
Jens Axboe6d048f52007-04-25 12:44:27 +02002786}
2787
Jens Axboea36e71f2009-04-15 12:15:11 +02002788static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
2789 struct cfq_queue *cur_cfqq)
Jens Axboe6d048f52007-04-25 12:44:27 +02002790{
Jens Axboef2d1f0a2009-04-23 12:19:38 +02002791 struct rb_root *root = &cfqd->prio_trees[cur_cfqq->org_ioprio];
Jens Axboea36e71f2009-04-15 12:15:11 +02002792 struct rb_node *parent, *node;
2793 struct cfq_queue *__cfqq;
2794 sector_t sector = cfqd->last_position;
2795
2796 if (RB_EMPTY_ROOT(root))
2797 return NULL;
2798
2799 /*
2800 * First, if we find a request starting at the end of the last
2801 * request, choose it.
2802 */
Jens Axboef2d1f0a2009-04-23 12:19:38 +02002803 __cfqq = cfq_prio_tree_lookup(cfqd, root, sector, &parent, NULL);
Jens Axboea36e71f2009-04-15 12:15:11 +02002804 if (__cfqq)
2805 return __cfqq;
2806
2807 /*
2808 * If the exact sector wasn't found, the parent of the NULL leaf
2809 * will contain the closest sector.
2810 */
2811 __cfqq = rb_entry(parent, struct cfq_queue, p_node);
Shaohua Lie9ce3352010-03-19 08:03:04 +01002812 if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq))
Jens Axboea36e71f2009-04-15 12:15:11 +02002813 return __cfqq;
2814
Tejun Heo2e46e8b2009-05-07 22:24:41 +09002815 if (blk_rq_pos(__cfqq->next_rq) < sector)
Jens Axboea36e71f2009-04-15 12:15:11 +02002816 node = rb_next(&__cfqq->p_node);
2817 else
2818 node = rb_prev(&__cfqq->p_node);
2819 if (!node)
2820 return NULL;
2821
2822 __cfqq = rb_entry(node, struct cfq_queue, p_node);
Shaohua Lie9ce3352010-03-19 08:03:04 +01002823 if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq))
Jens Axboea36e71f2009-04-15 12:15:11 +02002824 return __cfqq;
2825
2826 return NULL;
2827}
2828
2829/*
2830 * cfqd - obvious
2831 * cur_cfqq - passed in so that we don't decide that the current queue is
2832 * closely cooperating with itself.
2833 *
2834 * So, basically we're assuming that that cur_cfqq has dispatched at least
2835 * one request, and that cfqd->last_position reflects a position on the disk
2836 * associated with the I/O issued by cur_cfqq. I'm not sure this is a valid
2837 * assumption.
2838 */
2839static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd,
Jeff Moyerb3b6d042009-10-23 17:14:51 -04002840 struct cfq_queue *cur_cfqq)
Jens Axboea36e71f2009-04-15 12:15:11 +02002841{
2842 struct cfq_queue *cfqq;
2843
Divyesh Shah39c01b22010-03-25 15:45:57 +01002844 if (cfq_class_idle(cur_cfqq))
2845 return NULL;
Jeff Moyere6c5bc72009-10-23 17:14:52 -04002846 if (!cfq_cfqq_sync(cur_cfqq))
2847 return NULL;
2848 if (CFQQ_SEEKY(cur_cfqq))
2849 return NULL;
2850
Jens Axboea36e71f2009-04-15 12:15:11 +02002851 /*
Gui Jianfengb9d8f4c2009-12-08 08:54:17 +01002852 * Don't search priority tree if it's the only queue in the group.
2853 */
2854 if (cur_cfqq->cfqg->nr_cfqq == 1)
2855 return NULL;
2856
2857 /*
Jens Axboed9e76202007-04-20 14:27:50 +02002858 * We should notice if some of the queues are cooperating, eg
2859 * working closely on the same area of the disk. In that case,
2860 * we can group them together and don't waste time idling.
Jens Axboe6d048f52007-04-25 12:44:27 +02002861 */
Jens Axboea36e71f2009-04-15 12:15:11 +02002862 cfqq = cfqq_close(cfqd, cur_cfqq);
2863 if (!cfqq)
2864 return NULL;
2865
Vivek Goyal8682e1f2009-12-03 12:59:50 -05002866 /* If new queue belongs to different cfq_group, don't choose it */
2867 if (cur_cfqq->cfqg != cfqq->cfqg)
2868 return NULL;
2869
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04002870 /*
2871 * It only makes sense to merge sync queues.
2872 */
2873 if (!cfq_cfqq_sync(cfqq))
2874 return NULL;
Jeff Moyere6c5bc72009-10-23 17:14:52 -04002875 if (CFQQ_SEEKY(cfqq))
2876 return NULL;
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04002877
Corrado Zoccoloc0324a02009-10-27 19:16:03 +01002878 /*
2879 * Do not merge queues of different priority classes
2880 */
2881 if (cfq_class_rt(cfqq) != cfq_class_rt(cur_cfqq))
2882 return NULL;
2883
Jens Axboea36e71f2009-04-15 12:15:11 +02002884 return cfqq;
Jens Axboe6d048f52007-04-25 12:44:27 +02002885}
2886
Corrado Zoccoloa6d44e92009-10-26 22:45:11 +01002887/*
2888 * Determine whether we should enforce idle window for this queue.
2889 */
2890
2891static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
2892{
Vivek Goyal3bf10fe2012-10-03 16:56:56 -04002893 enum wl_class_t wl_class = cfqq_class(cfqq);
Vivek Goyal34b98d02012-10-03 16:56:58 -04002894 struct cfq_rb_root *st = cfqq->service_tree;
Corrado Zoccoloa6d44e92009-10-26 22:45:11 +01002895
Vivek Goyal34b98d02012-10-03 16:56:58 -04002896 BUG_ON(!st);
2897 BUG_ON(!st->count);
Vivek Goyalf04a6422009-12-03 12:59:40 -05002898
Vivek Goyalb6508c12010-08-23 12:23:33 +02002899 if (!cfqd->cfq_slice_idle)
2900 return false;
2901
Corrado Zoccoloa6d44e92009-10-26 22:45:11 +01002902 /* We never do for idle class queues. */
Vivek Goyal3bf10fe2012-10-03 16:56:56 -04002903 if (wl_class == IDLE_WORKLOAD)
Corrado Zoccoloa6d44e92009-10-26 22:45:11 +01002904 return false;
2905
2906 /* We do for queues that were marked with idle window flag. */
Shaohua Li3c764b72009-12-04 13:12:06 +01002907 if (cfq_cfqq_idle_window(cfqq) &&
2908 !(blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag))
Corrado Zoccoloa6d44e92009-10-26 22:45:11 +01002909 return true;
2910
2911 /*
2912 * Otherwise, we do only if they are the last ones
2913 * in their service tree.
2914 */
Vivek Goyal34b98d02012-10-03 16:56:58 -04002915 if (st->count == 1 && cfq_cfqq_sync(cfqq) &&
2916 !cfq_io_thinktime_big(cfqd, &st->ttime, false))
Shaohua Lic1e44752010-11-08 15:01:02 +01002917 return true;
Vivek Goyal34b98d02012-10-03 16:56:58 -04002918 cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", st->count);
Shaohua Lic1e44752010-11-08 15:01:02 +01002919 return false;
Corrado Zoccoloa6d44e92009-10-26 22:45:11 +01002920}
2921
Jens Axboe6d048f52007-04-25 12:44:27 +02002922static void cfq_arm_slice_timer(struct cfq_data *cfqd)
Jens Axboe22e2c502005-06-27 10:55:12 +02002923{
Jens Axboe17926692007-01-19 11:59:30 +11002924 struct cfq_queue *cfqq = cfqd->active_queue;
Jan Karae7954212016-01-12 16:24:15 +01002925 struct cfq_rb_root *st = cfqq->service_tree;
Tejun Heoc5869802011-12-14 00:33:41 +01002926 struct cfq_io_cq *cic;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06002927 u64 sl, group_idle = 0;
2928 u64 now = ktime_get_ns();
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002929
Jens Axboea68bbddba2008-09-24 13:03:33 +02002930 /*
Jens Axboef7d7b7a2008-09-25 11:37:50 +02002931 * SSD device without seek penalty, disable idling. But only do so
2932 * for devices that support queuing, otherwise we still have a problem
2933 * with sync vs async workloads.
Jens Axboea68bbddba2008-09-24 13:03:33 +02002934 */
Jens Axboef7d7b7a2008-09-25 11:37:50 +02002935 if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag)
Jens Axboea68bbddba2008-09-24 13:03:33 +02002936 return;
2937
Jens Axboedd67d052006-06-21 09:36:18 +02002938 WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list));
Jens Axboe6d048f52007-04-25 12:44:27 +02002939 WARN_ON(cfq_cfqq_slice_new(cfqq));
Jens Axboe22e2c502005-06-27 10:55:12 +02002940
2941 /*
2942 * idle is disabled, either manually or by past process history
2943 */
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02002944 if (!cfq_should_idle(cfqd, cfqq)) {
2945 /* no queue idling. Check for group idling */
2946 if (cfqd->cfq_group_idle)
2947 group_idle = cfqd->cfq_group_idle;
2948 else
2949 return;
2950 }
Jens Axboe6d048f52007-04-25 12:44:27 +02002951
Jens Axboe22e2c502005-06-27 10:55:12 +02002952 /*
Corrado Zoccolo8e550632009-11-26 10:02:58 +01002953 * still active requests from this queue, don't idle
Jens Axboe7b679132008-05-30 12:23:07 +02002954 */
Corrado Zoccolo8e550632009-11-26 10:02:58 +01002955 if (cfqq->dispatched)
Jens Axboe7b679132008-05-30 12:23:07 +02002956 return;
2957
2958 /*
Jens Axboe22e2c502005-06-27 10:55:12 +02002959 * task has exited, don't wait
2960 */
Jens Axboe206dc692006-03-28 13:03:44 +02002961 cic = cfqd->active_cic;
Tejun Heof6e8d012012-03-05 13:15:26 -08002962 if (!cic || !atomic_read(&cic->icq.ioc->active_ref))
Jens Axboe6d048f52007-04-25 12:44:27 +02002963 return;
2964
Corrado Zoccolo355b6592009-10-08 08:43:32 +02002965 /*
2966 * If our average think time is larger than the remaining time
2967 * slice, then don't idle. This avoids overrunning the allotted
2968 * time slice.
2969 */
Shaohua Li383cd722011-07-12 14:24:35 +02002970 if (sample_valid(cic->ttime.ttime_samples) &&
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06002971 (cfqq->slice_end - now < cic->ttime.ttime_mean)) {
2972 cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%llu",
Shaohua Li383cd722011-07-12 14:24:35 +02002973 cic->ttime.ttime_mean);
Corrado Zoccolo355b6592009-10-08 08:43:32 +02002974 return;
Divyesh Shahb1ffe732010-03-25 15:45:03 +01002975 }
Corrado Zoccolo355b6592009-10-08 08:43:32 +02002976
Jan Karae7954212016-01-12 16:24:15 +01002977 /*
2978 * There are other queues in the group or this is the only group and
2979 * it has too big thinktime, don't do group idle.
2980 */
2981 if (group_idle &&
2982 (cfqq->cfqg->nr_cfqq > 1 ||
2983 cfq_io_thinktime_big(cfqd, &st->ttime, true)))
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02002984 return;
2985
Jens Axboe3b181522005-06-27 10:56:24 +02002986 cfq_mark_cfqq_wait_request(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02002987
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02002988 if (group_idle)
2989 sl = cfqd->cfq_group_idle;
2990 else
2991 sl = cfqd->cfq_slice_idle;
Jens Axboe206dc692006-03-28 13:03:44 +02002992
Jan Kara91148322016-06-08 15:11:39 +02002993 hrtimer_start(&cfqd->idle_slice_timer, ns_to_ktime(sl),
2994 HRTIMER_MODE_REL);
Tejun Heo155fead2012-04-01 14:38:44 -07002995 cfqg_stats_set_start_idle_time(cfqq->cfqg);
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06002996 cfq_log_cfqq(cfqd, cfqq, "arm_idle: %llu group_idle: %d", sl,
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02002997 group_idle ? 1 : 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002998}
2999
Jens Axboe498d3aa22007-04-26 12:54:48 +02003000/*
3001 * Move request from internal lists to the request queue dispatch list.
3002 */
Jens Axboe165125e2007-07-24 09:28:11 +02003003static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003004{
Jens Axboe3ed9a292007-04-23 08:33:33 +02003005 struct cfq_data *cfqd = q->elevator->elevator_data;
Jens Axboe5e705372006-07-13 12:39:25 +02003006 struct cfq_queue *cfqq = RQ_CFQQ(rq);
Jens Axboe22e2c502005-06-27 10:55:12 +02003007
Jens Axboe7b679132008-05-30 12:23:07 +02003008 cfq_log_cfqq(cfqd, cfqq, "dispatch_insert");
3009
Jeff Moyer06d21882009-09-11 17:08:59 +02003010 cfqq->next_rq = cfq_find_next_rq(cfqd, cfqq, rq);
Jens Axboe5380a102006-07-13 12:37:56 +02003011 cfq_remove_request(rq);
Jens Axboe6d048f52007-04-25 12:44:27 +02003012 cfqq->dispatched++;
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02003013 (RQ_CFQG(rq))->dispatched++;
Jens Axboe5380a102006-07-13 12:37:56 +02003014 elv_dispatch_sort(q, rq);
Jens Axboe3ed9a292007-04-23 08:33:33 +02003015
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01003016 cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++;
Vivek Goyalc4e78932010-08-23 12:25:03 +02003017 cfqq->nr_sectors += blk_rq_sectors(rq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003018}
3019
3020/*
3021 * return expired entry, or NULL to just start from scratch in rbtree
3022 */
Jens Axboefebffd62008-01-28 13:19:43 +01003023static struct request *cfq_check_fifo(struct cfq_queue *cfqq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003024{
Jens Axboe30996f42009-10-05 11:03:39 +02003025 struct request *rq = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003026
Jens Axboe3b181522005-06-27 10:56:24 +02003027 if (cfq_cfqq_fifo_expire(cfqq))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003028 return NULL;
Jens Axboecb887412007-01-19 12:01:16 +11003029
3030 cfq_mark_cfqq_fifo_expire(cfqq);
3031
Jens Axboe89850f72006-07-22 16:48:31 +02003032 if (list_empty(&cfqq->fifo))
3033 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003034
Jens Axboe89850f72006-07-22 16:48:31 +02003035 rq = rq_entry_fifo(cfqq->fifo.next);
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003036 if (ktime_get_ns() < rq->fifo_time)
Jens Axboe7b679132008-05-30 12:23:07 +02003037 rq = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003038
Jens Axboe6d048f52007-04-25 12:44:27 +02003039 return rq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003040}
3041
Jens Axboe22e2c502005-06-27 10:55:12 +02003042static inline int
3043cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
3044{
3045 const int base_rq = cfqd->cfq_slice_async_rq;
3046
3047 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
3048
Namhyung Kimb9f8ce02011-05-24 10:23:21 +02003049 return 2 * base_rq * (IOPRIO_BE_NR - cfqq->ioprio);
Jens Axboe22e2c502005-06-27 10:55:12 +02003050}
3051
3052/*
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003053 * Must be called with the queue_lock held.
3054 */
3055static int cfqq_process_refs(struct cfq_queue *cfqq)
3056{
3057 int process_refs, io_refs;
3058
3059 io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE];
Shaohua Li30d7b942011-01-07 08:46:59 +01003060 process_refs = cfqq->ref - io_refs;
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003061 BUG_ON(process_refs < 0);
3062 return process_refs;
3063}
3064
3065static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq)
3066{
Jeff Moyere6c5bc72009-10-23 17:14:52 -04003067 int process_refs, new_process_refs;
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003068 struct cfq_queue *__cfqq;
3069
Jeff Moyerc10b61f2010-06-17 10:19:11 -04003070 /*
3071 * If there are no process references on the new_cfqq, then it is
3072 * unsafe to follow the ->new_cfqq chain as other cfqq's in the
3073 * chain may have dropped their last reference (not just their
3074 * last process reference).
3075 */
3076 if (!cfqq_process_refs(new_cfqq))
3077 return;
3078
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003079 /* Avoid a circular list and skip interim queue merges */
3080 while ((__cfqq = new_cfqq->new_cfqq)) {
3081 if (__cfqq == cfqq)
3082 return;
3083 new_cfqq = __cfqq;
3084 }
3085
3086 process_refs = cfqq_process_refs(cfqq);
Jeff Moyerc10b61f2010-06-17 10:19:11 -04003087 new_process_refs = cfqq_process_refs(new_cfqq);
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003088 /*
3089 * If the process for the cfqq has gone away, there is no
3090 * sense in merging the queues.
3091 */
Jeff Moyerc10b61f2010-06-17 10:19:11 -04003092 if (process_refs == 0 || new_process_refs == 0)
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003093 return;
3094
Jeff Moyere6c5bc72009-10-23 17:14:52 -04003095 /*
3096 * Merge in the direction of the lesser amount of work.
3097 */
Jeff Moyere6c5bc72009-10-23 17:14:52 -04003098 if (new_process_refs >= process_refs) {
3099 cfqq->new_cfqq = new_cfqq;
Shaohua Li30d7b942011-01-07 08:46:59 +01003100 new_cfqq->ref += process_refs;
Jeff Moyere6c5bc72009-10-23 17:14:52 -04003101 } else {
3102 new_cfqq->new_cfqq = cfqq;
Shaohua Li30d7b942011-01-07 08:46:59 +01003103 cfqq->ref += new_process_refs;
Jeff Moyere6c5bc72009-10-23 17:14:52 -04003104 }
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003105}
3106
Vivek Goyal6d816ec2012-10-03 16:56:59 -04003107static enum wl_type_t cfq_choose_wl_type(struct cfq_data *cfqd,
Vivek Goyal3bf10fe2012-10-03 16:56:56 -04003108 struct cfq_group *cfqg, enum wl_class_t wl_class)
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003109{
3110 struct cfq_queue *queue;
3111 int i;
3112 bool key_valid = false;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003113 u64 lowest_key = 0;
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003114 enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD;
3115
Vivek Goyal65b32a52009-12-16 17:52:59 -05003116 for (i = 0; i <= SYNC_WORKLOAD; ++i) {
3117 /* select the one with lowest rb_key */
Vivek Goyal34b98d02012-10-03 16:56:58 -04003118 queue = cfq_rb_first(st_for(cfqg, wl_class, i));
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003119 if (queue &&
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003120 (!key_valid || queue->rb_key < lowest_key)) {
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003121 lowest_key = queue->rb_key;
3122 cur_best = i;
3123 key_valid = true;
3124 }
3125 }
3126
3127 return cur_best;
3128}
3129
Vivek Goyal6d816ec2012-10-03 16:56:59 -04003130static void
3131choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg)
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003132{
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003133 u64 slice;
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003134 unsigned count;
Vivek Goyalcdb16e82009-12-03 12:59:38 -05003135 struct cfq_rb_root *st;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003136 u64 group_slice;
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04003137 enum wl_class_t original_class = cfqd->serving_wl_class;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003138 u64 now = ktime_get_ns();
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05003139
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003140 /* Choose next priority. RT > BE > IDLE */
Vivek Goyal58ff82f2009-12-03 12:59:44 -05003141 if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04003142 cfqd->serving_wl_class = RT_WORKLOAD;
Vivek Goyal58ff82f2009-12-03 12:59:44 -05003143 else if (cfq_group_busy_queues_wl(BE_WORKLOAD, cfqd, cfqg))
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04003144 cfqd->serving_wl_class = BE_WORKLOAD;
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003145 else {
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04003146 cfqd->serving_wl_class = IDLE_WORKLOAD;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003147 cfqd->workload_expires = now + jiffies_to_nsecs(1);
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003148 return;
3149 }
3150
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04003151 if (original_class != cfqd->serving_wl_class)
Shaohua Li writese4ea0c12010-12-13 14:32:22 +01003152 goto new_workload;
3153
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003154 /*
3155 * For RT and BE, we have to choose also the type
3156 * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload
3157 * expiration time
3158 */
Vivek Goyal34b98d02012-10-03 16:56:58 -04003159 st = st_for(cfqg, cfqd->serving_wl_class, cfqd->serving_wl_type);
Vivek Goyalcdb16e82009-12-03 12:59:38 -05003160 count = st->count;
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003161
3162 /*
Vivek Goyal65b32a52009-12-16 17:52:59 -05003163 * check workload expiration, and that we still have other queues ready
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003164 */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003165 if (count && !(now > cfqd->workload_expires))
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003166 return;
3167
Shaohua Li writese4ea0c12010-12-13 14:32:22 +01003168new_workload:
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003169 /* otherwise select new workload type */
Vivek Goyal6d816ec2012-10-03 16:56:59 -04003170 cfqd->serving_wl_type = cfq_choose_wl_type(cfqd, cfqg,
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04003171 cfqd->serving_wl_class);
Vivek Goyal34b98d02012-10-03 16:56:58 -04003172 st = st_for(cfqg, cfqd->serving_wl_class, cfqd->serving_wl_type);
Vivek Goyalcdb16e82009-12-03 12:59:38 -05003173 count = st->count;
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003174
3175 /*
3176 * the workload slice is computed as a fraction of target latency
3177 * proportional to the number of queues in that workload, over
3178 * all the queues in the same priority class
3179 */
Vivek Goyal58ff82f2009-12-03 12:59:44 -05003180 group_slice = cfq_group_slice(cfqd, cfqg);
3181
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003182 slice = div_u64(group_slice * count,
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04003183 max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_wl_class],
3184 cfq_group_busy_queues_wl(cfqd->serving_wl_class, cfqd,
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003185 cfqg)));
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003186
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04003187 if (cfqd->serving_wl_type == ASYNC_WORKLOAD) {
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003188 u64 tmp;
Vivek Goyalf26bd1f2009-12-03 12:59:54 -05003189
3190 /*
3191 * Async queues are currently system wide. Just taking
3192 * proportion of queues with-in same group will lead to higher
3193 * async ratio system wide as generally root group is going
3194 * to have higher weight. A more accurate thing would be to
3195 * calculate system wide asnc/sync ratio.
3196 */
Tao Ma5bf14c02012-04-01 14:33:39 -07003197 tmp = cfqd->cfq_target_latency *
3198 cfqg_busy_async_queues(cfqd, cfqg);
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003199 tmp = div_u64(tmp, cfqd->busy_queues);
3200 slice = min_t(u64, slice, tmp);
Vivek Goyalf26bd1f2009-12-03 12:59:54 -05003201
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003202 /* async workload slice is scaled down according to
3203 * the sync/async slice ratio. */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003204 slice = div64_u64(slice*cfqd->cfq_slice[0], cfqd->cfq_slice[1]);
Vivek Goyalf26bd1f2009-12-03 12:59:54 -05003205 } else
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003206 /* sync workload slice is at least 2 * cfq_slice_idle */
3207 slice = max(slice, 2 * cfqd->cfq_slice_idle);
3208
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003209 slice = max_t(u64, slice, CFQ_MIN_TT);
3210 cfq_log(cfqd, "workload slice:%llu", slice);
3211 cfqd->workload_expires = now + slice;
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003212}
3213
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05003214static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
3215{
3216 struct cfq_rb_root *st = &cfqd->grp_service_tree;
Vivek Goyal25bc6b02009-12-03 12:59:43 -05003217 struct cfq_group *cfqg;
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05003218
3219 if (RB_EMPTY_ROOT(&st->rb))
3220 return NULL;
Vivek Goyal25bc6b02009-12-03 12:59:43 -05003221 cfqg = cfq_rb_first_group(st);
Vivek Goyal25bc6b02009-12-03 12:59:43 -05003222 update_min_vdisktime(st);
3223 return cfqg;
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05003224}
3225
Vivek Goyalcdb16e82009-12-03 12:59:38 -05003226static void cfq_choose_cfqg(struct cfq_data *cfqd)
3227{
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05003228 struct cfq_group *cfqg = cfq_get_next_cfqg(cfqd);
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003229 u64 now = ktime_get_ns();
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05003230
3231 cfqd->serving_group = cfqg;
Vivek Goyaldae739e2009-12-03 12:59:45 -05003232
3233 /* Restore the workload type data */
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04003234 if (cfqg->saved_wl_slice) {
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003235 cfqd->workload_expires = now + cfqg->saved_wl_slice;
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04003236 cfqd->serving_wl_type = cfqg->saved_wl_type;
3237 cfqd->serving_wl_class = cfqg->saved_wl_class;
Gui Jianfeng66ae2912009-12-15 10:08:45 +01003238 } else
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003239 cfqd->workload_expires = now - 1;
Gui Jianfeng66ae2912009-12-15 10:08:45 +01003240
Vivek Goyal6d816ec2012-10-03 16:56:59 -04003241 choose_wl_class_and_type(cfqd, cfqg);
Vivek Goyalcdb16e82009-12-03 12:59:38 -05003242}
3243
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003244/*
Jens Axboe498d3aa22007-04-26 12:54:48 +02003245 * Select a queue for service. If we have a current active queue,
3246 * check whether to continue servicing it, or retrieve and set a new one.
Jens Axboe22e2c502005-06-27 10:55:12 +02003247 */
Tejun Heo1b5ed5e12005-11-10 08:49:19 +01003248static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
Jens Axboe22e2c502005-06-27 10:55:12 +02003249{
Jens Axboea36e71f2009-04-15 12:15:11 +02003250 struct cfq_queue *cfqq, *new_cfqq = NULL;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003251 u64 now = ktime_get_ns();
Jens Axboe22e2c502005-06-27 10:55:12 +02003252
3253 cfqq = cfqd->active_queue;
3254 if (!cfqq)
3255 goto new_queue;
3256
Vivek Goyalf04a6422009-12-03 12:59:40 -05003257 if (!cfqd->rq_queued)
3258 return NULL;
Vivek Goyalc244bb52009-12-08 17:52:57 -05003259
3260 /*
3261 * We were waiting for group to get backlogged. Expire the queue
3262 */
3263 if (cfq_cfqq_wait_busy(cfqq) && !RB_EMPTY_ROOT(&cfqq->sort_list))
3264 goto expire;
3265
Jens Axboe22e2c502005-06-27 10:55:12 +02003266 /*
Jens Axboe6d048f52007-04-25 12:44:27 +02003267 * The active queue has run out of time, expire it and select new.
Jens Axboe22e2c502005-06-27 10:55:12 +02003268 */
Vivek Goyal7667aa02009-12-08 17:52:58 -05003269 if (cfq_slice_used(cfqq) && !cfq_cfqq_must_dispatch(cfqq)) {
3270 /*
3271 * If slice had not expired at the completion of last request
3272 * we might not have turned on wait_busy flag. Don't expire
3273 * the queue yet. Allow the group to get backlogged.
3274 *
3275 * The very fact that we have used the slice, that means we
3276 * have been idling all along on this queue and it should be
3277 * ok to wait for this request to complete.
3278 */
Vivek Goyal82bbbf22009-12-10 19:25:41 +01003279 if (cfqq->cfqg->nr_cfqq == 1 && RB_EMPTY_ROOT(&cfqq->sort_list)
3280 && cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) {
3281 cfqq = NULL;
Vivek Goyal7667aa02009-12-08 17:52:58 -05003282 goto keep_queue;
Vivek Goyal82bbbf22009-12-10 19:25:41 +01003283 } else
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02003284 goto check_group_idle;
Vivek Goyal7667aa02009-12-08 17:52:58 -05003285 }
Jens Axboe22e2c502005-06-27 10:55:12 +02003286
3287 /*
Jens Axboe6d048f52007-04-25 12:44:27 +02003288 * The active queue has requests and isn't expired, allow it to
3289 * dispatch.
Jens Axboe22e2c502005-06-27 10:55:12 +02003290 */
Jens Axboedd67d052006-06-21 09:36:18 +02003291 if (!RB_EMPTY_ROOT(&cfqq->sort_list))
Jens Axboe22e2c502005-06-27 10:55:12 +02003292 goto keep_queue;
Jens Axboe6d048f52007-04-25 12:44:27 +02003293
3294 /*
Jens Axboea36e71f2009-04-15 12:15:11 +02003295 * If another queue has a request waiting within our mean seek
3296 * distance, let it run. The expire code will check for close
3297 * cooperators and put the close queue at the front of the service
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003298 * tree. If possible, merge the expiring queue with the new cfqq.
Jens Axboea36e71f2009-04-15 12:15:11 +02003299 */
Jeff Moyerb3b6d042009-10-23 17:14:51 -04003300 new_cfqq = cfq_close_cooperator(cfqd, cfqq);
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003301 if (new_cfqq) {
3302 if (!cfqq->new_cfqq)
3303 cfq_setup_merge(cfqq, new_cfqq);
Jens Axboea36e71f2009-04-15 12:15:11 +02003304 goto expire;
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003305 }
Jens Axboea36e71f2009-04-15 12:15:11 +02003306
3307 /*
Jens Axboe6d048f52007-04-25 12:44:27 +02003308 * No requests pending. If the active queue still has requests in
3309 * flight or is idling for a new request, allow either of these
3310 * conditions to happen (or time out) before selecting a new queue.
3311 */
Jan Kara91148322016-06-08 15:11:39 +02003312 if (hrtimer_active(&cfqd->idle_slice_timer)) {
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02003313 cfqq = NULL;
3314 goto keep_queue;
3315 }
3316
Shaohua Li8e1ac662010-11-08 15:01:04 +01003317 /*
3318 * This is a deep seek queue, but the device is much faster than
3319 * the queue can deliver, don't idle
3320 **/
3321 if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) &&
3322 (cfq_cfqq_slice_new(cfqq) ||
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003323 (cfqq->slice_end - now > now - cfqq->slice_start))) {
Shaohua Li8e1ac662010-11-08 15:01:04 +01003324 cfq_clear_cfqq_deep(cfqq);
3325 cfq_clear_cfqq_idle_window(cfqq);
3326 }
3327
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02003328 if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) {
3329 cfqq = NULL;
3330 goto keep_queue;
3331 }
3332
3333 /*
3334 * If group idle is enabled and there are requests dispatched from
3335 * this group, wait for requests to complete.
3336 */
3337check_group_idle:
Shaohua Li7700fc42011-07-12 14:24:56 +02003338 if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1 &&
3339 cfqq->cfqg->dispatched &&
3340 !cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true)) {
Jens Axboecaaa5f92006-06-16 11:23:00 +02003341 cfqq = NULL;
3342 goto keep_queue;
Jens Axboe22e2c502005-06-27 10:55:12 +02003343 }
3344
Jens Axboe3b181522005-06-27 10:56:24 +02003345expire:
Vivek Goyale5ff0822010-04-26 19:25:11 +02003346 cfq_slice_expired(cfqd, 0);
Jens Axboe3b181522005-06-27 10:56:24 +02003347new_queue:
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003348 /*
3349 * Current queue expired. Check if we have to switch to a new
3350 * service tree
3351 */
3352 if (!new_cfqq)
Vivek Goyalcdb16e82009-12-03 12:59:38 -05003353 cfq_choose_cfqg(cfqd);
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003354
Jens Axboea36e71f2009-04-15 12:15:11 +02003355 cfqq = cfq_set_active_queue(cfqd, new_cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02003356keep_queue:
Jens Axboe3b181522005-06-27 10:56:24 +02003357 return cfqq;
Jens Axboe22e2c502005-06-27 10:55:12 +02003358}
3359
Jens Axboefebffd62008-01-28 13:19:43 +01003360static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq)
Jens Axboed9e76202007-04-20 14:27:50 +02003361{
3362 int dispatched = 0;
3363
3364 while (cfqq->next_rq) {
3365 cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq);
3366 dispatched++;
3367 }
3368
3369 BUG_ON(!list_empty(&cfqq->fifo));
Vivek Goyalf04a6422009-12-03 12:59:40 -05003370
3371 /* By default cfqq is not expired if it is empty. Do it explicitly */
Vivek Goyale5ff0822010-04-26 19:25:11 +02003372 __cfq_slice_expired(cfqq->cfqd, cfqq, 0);
Jens Axboed9e76202007-04-20 14:27:50 +02003373 return dispatched;
3374}
3375
Jens Axboe498d3aa22007-04-26 12:54:48 +02003376/*
3377 * Drain our current requests. Used for barriers and when switching
3378 * io schedulers on-the-fly.
3379 */
Jens Axboed9e76202007-04-20 14:27:50 +02003380static int cfq_forced_dispatch(struct cfq_data *cfqd)
Tejun Heo1b5ed5e12005-11-10 08:49:19 +01003381{
Jens Axboe08717142008-01-28 11:38:15 +01003382 struct cfq_queue *cfqq;
Jens Axboed9e76202007-04-20 14:27:50 +02003383 int dispatched = 0;
Vivek Goyalcdb16e82009-12-03 12:59:38 -05003384
Divyesh Shah3440c492010-04-09 09:29:57 +02003385 /* Expire the timeslice of the current active queue first */
Vivek Goyale5ff0822010-04-26 19:25:11 +02003386 cfq_slice_expired(cfqd, 0);
Divyesh Shah3440c492010-04-09 09:29:57 +02003387 while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL) {
3388 __cfq_set_active_queue(cfqd, cfqq);
Vivek Goyalf04a6422009-12-03 12:59:40 -05003389 dispatched += __cfq_forced_dispatch_cfqq(cfqq);
Divyesh Shah3440c492010-04-09 09:29:57 +02003390 }
Tejun Heo1b5ed5e12005-11-10 08:49:19 +01003391
Tejun Heo1b5ed5e12005-11-10 08:49:19 +01003392 BUG_ON(cfqd->busy_queues);
3393
Jeff Moyer6923715a2009-06-12 15:29:30 +02003394 cfq_log(cfqd, "forced_dispatch=%d", dispatched);
Tejun Heo1b5ed5e12005-11-10 08:49:19 +01003395 return dispatched;
3396}
3397
Shaohua Liabc3c742010-03-01 09:20:54 +01003398static inline bool cfq_slice_used_soon(struct cfq_data *cfqd,
3399 struct cfq_queue *cfqq)
3400{
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003401 u64 now = ktime_get_ns();
3402
Shaohua Liabc3c742010-03-01 09:20:54 +01003403 /* the queue hasn't finished any request, can't estimate */
3404 if (cfq_cfqq_slice_new(cfqq))
Shaohua Lic1e44752010-11-08 15:01:02 +01003405 return true;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003406 if (now + cfqd->cfq_slice_idle * cfqq->dispatched > cfqq->slice_end)
Shaohua Lic1e44752010-11-08 15:01:02 +01003407 return true;
Shaohua Liabc3c742010-03-01 09:20:54 +01003408
Shaohua Lic1e44752010-11-08 15:01:02 +01003409 return false;
Shaohua Liabc3c742010-03-01 09:20:54 +01003410}
3411
Jens Axboe0b182d62009-10-06 20:49:37 +02003412static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
Jens Axboe2f5cb732009-04-07 08:51:19 +02003413{
Jens Axboe2f5cb732009-04-07 08:51:19 +02003414 unsigned int max_dispatch;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003415
Glauber Costa3932a862016-09-22 20:59:59 -04003416 if (cfq_cfqq_must_dispatch(cfqq))
3417 return true;
3418
Jens Axboe2f5cb732009-04-07 08:51:19 +02003419 /*
Jens Axboe5ad531d2009-07-03 12:57:48 +02003420 * Drain async requests before we start sync IO
3421 */
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01003422 if (cfq_should_idle(cfqd, cfqq) && cfqd->rq_in_flight[BLK_RW_ASYNC])
Jens Axboe0b182d62009-10-06 20:49:37 +02003423 return false;
Jens Axboe5ad531d2009-07-03 12:57:48 +02003424
3425 /*
Jens Axboe2f5cb732009-04-07 08:51:19 +02003426 * If this is an async queue and we have sync IO in flight, let it wait
3427 */
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01003428 if (cfqd->rq_in_flight[BLK_RW_SYNC] && !cfq_cfqq_sync(cfqq))
Jens Axboe0b182d62009-10-06 20:49:37 +02003429 return false;
Jens Axboe2f5cb732009-04-07 08:51:19 +02003430
Shaohua Liabc3c742010-03-01 09:20:54 +01003431 max_dispatch = max_t(unsigned int, cfqd->cfq_quantum / 2, 1);
Jens Axboe2f5cb732009-04-07 08:51:19 +02003432 if (cfq_class_idle(cfqq))
3433 max_dispatch = 1;
3434
3435 /*
3436 * Does this cfqq already have too much IO in flight?
3437 */
3438 if (cfqq->dispatched >= max_dispatch) {
Shaohua Lief8a41d2011-03-07 09:26:29 +01003439 bool promote_sync = false;
Jens Axboe2f5cb732009-04-07 08:51:19 +02003440 /*
3441 * idle queue must always only have a single IO in flight
3442 */
Jens Axboe3ed9a292007-04-23 08:33:33 +02003443 if (cfq_class_idle(cfqq))
Jens Axboe0b182d62009-10-06 20:49:37 +02003444 return false;
Jens Axboe3ed9a292007-04-23 08:33:33 +02003445
Jens Axboe2f5cb732009-04-07 08:51:19 +02003446 /*
Li, Shaohuac4ade942011-03-23 08:30:34 +01003447 * If there is only one sync queue
3448 * we can ignore async queue here and give the sync
Shaohua Lief8a41d2011-03-07 09:26:29 +01003449 * queue no dispatch limit. The reason is a sync queue can
3450 * preempt async queue, limiting the sync queue doesn't make
3451 * sense. This is useful for aiostress test.
3452 */
Li, Shaohuac4ade942011-03-23 08:30:34 +01003453 if (cfq_cfqq_sync(cfqq) && cfqd->busy_sync_queues == 1)
3454 promote_sync = true;
Shaohua Lief8a41d2011-03-07 09:26:29 +01003455
3456 /*
Jens Axboe2f5cb732009-04-07 08:51:19 +02003457 * We have other queues, don't allow more IO from this one
3458 */
Shaohua Lief8a41d2011-03-07 09:26:29 +01003459 if (cfqd->busy_queues > 1 && cfq_slice_used_soon(cfqd, cfqq) &&
3460 !promote_sync)
Jens Axboe0b182d62009-10-06 20:49:37 +02003461 return false;
Jens Axboe9ede2092007-01-19 12:11:44 +11003462
Jens Axboe2f5cb732009-04-07 08:51:19 +02003463 /*
Shaohua Li474b18c2009-12-03 12:58:05 +01003464 * Sole queue user, no limit
Vivek Goyal365722b2009-10-03 15:21:27 +02003465 */
Shaohua Lief8a41d2011-03-07 09:26:29 +01003466 if (cfqd->busy_queues == 1 || promote_sync)
Shaohua Liabc3c742010-03-01 09:20:54 +01003467 max_dispatch = -1;
3468 else
3469 /*
3470 * Normally we start throttling cfqq when cfq_quantum/2
3471 * requests have been dispatched. But we can drive
3472 * deeper queue depths at the beginning of slice
3473 * subjected to upper limit of cfq_quantum.
3474 * */
3475 max_dispatch = cfqd->cfq_quantum;
Jens Axboe8e296752009-10-03 16:26:03 +02003476 }
3477
3478 /*
3479 * Async queues must wait a bit before being allowed dispatch.
3480 * We also ramp up the dispatch depth gradually for async IO,
3481 * based on the last sync IO we serviced
3482 */
Jens Axboe963b72f2009-10-03 19:42:18 +02003483 if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) {
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003484 u64 last_sync = ktime_get_ns() - cfqd->last_delayed_sync;
Jens Axboe8e296752009-10-03 16:26:03 +02003485 unsigned int depth;
Vivek Goyal365722b2009-10-03 15:21:27 +02003486
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003487 depth = div64_u64(last_sync, cfqd->cfq_slice[1]);
Jens Axboee00c54c2009-10-04 20:36:19 +02003488 if (!depth && !cfqq->dispatched)
3489 depth = 1;
Jens Axboe8e296752009-10-03 16:26:03 +02003490 if (depth < max_dispatch)
3491 max_dispatch = depth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003492 }
3493
Jens Axboe0b182d62009-10-06 20:49:37 +02003494 /*
3495 * If we're below the current max, allow a dispatch
3496 */
3497 return cfqq->dispatched < max_dispatch;
3498}
3499
3500/*
3501 * Dispatch a request from cfqq, moving them to the request queue
3502 * dispatch list.
3503 */
3504static bool cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq)
3505{
3506 struct request *rq;
3507
3508 BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
3509
Glauber Costa3932a862016-09-22 20:59:59 -04003510 rq = cfq_check_fifo(cfqq);
3511 if (rq)
3512 cfq_mark_cfqq_must_dispatch(cfqq);
3513
Jens Axboe0b182d62009-10-06 20:49:37 +02003514 if (!cfq_may_dispatch(cfqd, cfqq))
3515 return false;
3516
3517 /*
3518 * follow expired path, else get first next available
3519 */
Jens Axboe0b182d62009-10-06 20:49:37 +02003520 if (!rq)
3521 rq = cfqq->next_rq;
Glauber Costa3932a862016-09-22 20:59:59 -04003522 else
3523 cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq);
Jens Axboe0b182d62009-10-06 20:49:37 +02003524
3525 /*
3526 * insert request into driver dispatch list
3527 */
3528 cfq_dispatch_insert(cfqd->queue, rq);
3529
3530 if (!cfqd->active_cic) {
Tejun Heoc5869802011-12-14 00:33:41 +01003531 struct cfq_io_cq *cic = RQ_CIC(rq);
Jens Axboe0b182d62009-10-06 20:49:37 +02003532
Tejun Heoc5869802011-12-14 00:33:41 +01003533 atomic_long_inc(&cic->icq.ioc->refcount);
Jens Axboe0b182d62009-10-06 20:49:37 +02003534 cfqd->active_cic = cic;
3535 }
3536
3537 return true;
3538}
3539
3540/*
3541 * Find the cfqq that we need to service and move a request from that to the
3542 * dispatch list
3543 */
3544static int cfq_dispatch_requests(struct request_queue *q, int force)
3545{
3546 struct cfq_data *cfqd = q->elevator->elevator_data;
3547 struct cfq_queue *cfqq;
3548
3549 if (!cfqd->busy_queues)
3550 return 0;
3551
3552 if (unlikely(force))
3553 return cfq_forced_dispatch(cfqd);
3554
3555 cfqq = cfq_select_queue(cfqd);
3556 if (!cfqq)
Jens Axboe8e296752009-10-03 16:26:03 +02003557 return 0;
3558
Jens Axboe2f5cb732009-04-07 08:51:19 +02003559 /*
Jens Axboe0b182d62009-10-06 20:49:37 +02003560 * Dispatch a request from this cfqq, if it is allowed
Jens Axboe2f5cb732009-04-07 08:51:19 +02003561 */
Jens Axboe0b182d62009-10-06 20:49:37 +02003562 if (!cfq_dispatch_request(cfqd, cfqq))
3563 return 0;
3564
Jens Axboe2f5cb732009-04-07 08:51:19 +02003565 cfqq->slice_dispatch++;
Jens Axboeb0291952009-04-07 11:38:31 +02003566 cfq_clear_cfqq_must_dispatch(cfqq);
Jens Axboe2f5cb732009-04-07 08:51:19 +02003567
3568 /*
3569 * expire an async queue immediately if it has used up its slice. idle
3570 * queue always expire after 1 dispatch round.
3571 */
3572 if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) &&
3573 cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
3574 cfq_class_idle(cfqq))) {
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003575 cfqq->slice_end = ktime_get_ns() + 1;
Vivek Goyale5ff0822010-04-26 19:25:11 +02003576 cfq_slice_expired(cfqd, 0);
Jens Axboe2f5cb732009-04-07 08:51:19 +02003577 }
3578
Shan Weib217a902009-09-01 10:06:42 +02003579 cfq_log_cfqq(cfqd, cfqq, "dispatched a request");
Jens Axboe2f5cb732009-04-07 08:51:19 +02003580 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003581}
3582
Linus Torvalds1da177e2005-04-16 15:20:36 -07003583/*
Jens Axboe5e705372006-07-13 12:39:25 +02003584 * task holds one reference to the queue, dropped when task exits. each rq
3585 * in-flight on this queue also holds a reference, dropped when rq is freed.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003586 *
Vivek Goyalb1c35762009-12-03 12:59:47 -05003587 * Each cfq queue took a reference on the parent group. Drop it now.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003588 * queue lock must be held here.
3589 */
3590static void cfq_put_queue(struct cfq_queue *cfqq)
3591{
Jens Axboe22e2c502005-06-27 10:55:12 +02003592 struct cfq_data *cfqd = cfqq->cfqd;
Justin TerAvest0bbfeb82011-03-01 15:05:08 -05003593 struct cfq_group *cfqg;
Jens Axboe22e2c502005-06-27 10:55:12 +02003594
Shaohua Li30d7b942011-01-07 08:46:59 +01003595 BUG_ON(cfqq->ref <= 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003596
Shaohua Li30d7b942011-01-07 08:46:59 +01003597 cfqq->ref--;
3598 if (cfqq->ref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003599 return;
3600
Jens Axboe7b679132008-05-30 12:23:07 +02003601 cfq_log_cfqq(cfqd, cfqq, "put_queue");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003602 BUG_ON(rb_first(&cfqq->sort_list));
Jens Axboe22e2c502005-06-27 10:55:12 +02003603 BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
Vivek Goyalb1c35762009-12-03 12:59:47 -05003604 cfqg = cfqq->cfqg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003605
Jens Axboe28f95cbc2007-01-19 12:09:53 +11003606 if (unlikely(cfqd->active_queue == cfqq)) {
Vivek Goyale5ff0822010-04-26 19:25:11 +02003607 __cfq_slice_expired(cfqd, cfqq, 0);
Jens Axboe23e018a2009-10-05 08:52:35 +02003608 cfq_schedule_dispatch(cfqd);
Jens Axboe28f95cbc2007-01-19 12:09:53 +11003609 }
Jens Axboe22e2c502005-06-27 10:55:12 +02003610
Vivek Goyalf04a6422009-12-03 12:59:40 -05003611 BUG_ON(cfq_cfqq_on_rr(cfqq));
Linus Torvalds1da177e2005-04-16 15:20:36 -07003612 kmem_cache_free(cfq_pool, cfqq);
Tejun Heoeb7d8c072012-03-23 14:02:53 +01003613 cfqg_put(cfqg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003614}
3615
Shaohua Lid02a2c02010-05-25 10:16:53 +02003616static void cfq_put_cooperator(struct cfq_queue *cfqq)
Jens Axboe89850f72006-07-22 16:48:31 +02003617{
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003618 struct cfq_queue *__cfqq, *next;
3619
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003620 /*
3621 * If this queue was scheduled to merge with another queue, be
3622 * sure to drop the reference taken on that queue (and others in
3623 * the merge chain). See cfq_setup_merge and cfq_merge_cfqqs.
3624 */
3625 __cfqq = cfqq->new_cfqq;
3626 while (__cfqq) {
3627 if (__cfqq == cfqq) {
3628 WARN(1, "cfqq->new_cfqq loop detected\n");
3629 break;
3630 }
3631 next = __cfqq->new_cfqq;
3632 cfq_put_queue(__cfqq);
3633 __cfqq = next;
3634 }
Shaohua Lid02a2c02010-05-25 10:16:53 +02003635}
3636
3637static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
3638{
3639 if (unlikely(cfqq == cfqd->active_queue)) {
3640 __cfq_slice_expired(cfqd, cfqq, 0);
3641 cfq_schedule_dispatch(cfqd);
3642 }
3643
3644 cfq_put_cooperator(cfqq);
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003645
Jens Axboe89850f72006-07-22 16:48:31 +02003646 cfq_put_queue(cfqq);
3647}
3648
Tejun Heo9b84cac2011-12-14 00:33:42 +01003649static void cfq_init_icq(struct io_cq *icq)
3650{
3651 struct cfq_io_cq *cic = icq_to_cic(icq);
3652
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003653 cic->ttime.last_end_request = ktime_get_ns();
Tejun Heo9b84cac2011-12-14 00:33:42 +01003654}
3655
Tejun Heoc5869802011-12-14 00:33:41 +01003656static void cfq_exit_icq(struct io_cq *icq)
Jens Axboe89850f72006-07-22 16:48:31 +02003657{
Tejun Heoc5869802011-12-14 00:33:41 +01003658 struct cfq_io_cq *cic = icq_to_cic(icq);
Tejun Heo283287a2011-12-14 00:33:38 +01003659 struct cfq_data *cfqd = cic_to_cfqd(cic);
Fabio Checconi4faa3c82008-04-10 08:28:01 +02003660
Tejun Heo563180a2015-08-18 14:55:00 -07003661 if (cic_to_cfqq(cic, false)) {
3662 cfq_exit_cfqq(cfqd, cic_to_cfqq(cic, false));
3663 cic_set_cfqq(cic, NULL, false);
Jens Axboe89850f72006-07-22 16:48:31 +02003664 }
3665
Tejun Heo563180a2015-08-18 14:55:00 -07003666 if (cic_to_cfqq(cic, true)) {
3667 cfq_exit_cfqq(cfqd, cic_to_cfqq(cic, true));
3668 cic_set_cfqq(cic, NULL, true);
Jens Axboe89850f72006-07-22 16:48:31 +02003669 }
Jens Axboe89850f72006-07-22 16:48:31 +02003670}
3671
Tejun Heoabede6d2012-03-19 15:10:57 -07003672static void cfq_init_prio_data(struct cfq_queue *cfqq, struct cfq_io_cq *cic)
Jens Axboe22e2c502005-06-27 10:55:12 +02003673{
3674 struct task_struct *tsk = current;
3675 int ioprio_class;
3676
Jens Axboe3b181522005-06-27 10:56:24 +02003677 if (!cfq_cfqq_prio_changed(cfqq))
Jens Axboe22e2c502005-06-27 10:55:12 +02003678 return;
3679
Tejun Heo598971b2012-03-19 15:10:58 -07003680 ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio);
Jens Axboe22e2c502005-06-27 10:55:12 +02003681 switch (ioprio_class) {
Jens Axboefe094d92008-01-31 13:08:54 +01003682 default:
3683 printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class);
3684 case IOPRIO_CLASS_NONE:
3685 /*
Jens Axboe6d63c272008-05-07 09:51:23 +02003686 * no prio set, inherit CPU scheduling settings
Jens Axboefe094d92008-01-31 13:08:54 +01003687 */
3688 cfqq->ioprio = task_nice_ioprio(tsk);
Jens Axboe6d63c272008-05-07 09:51:23 +02003689 cfqq->ioprio_class = task_nice_ioclass(tsk);
Jens Axboefe094d92008-01-31 13:08:54 +01003690 break;
3691 case IOPRIO_CLASS_RT:
Tejun Heo598971b2012-03-19 15:10:58 -07003692 cfqq->ioprio = IOPRIO_PRIO_DATA(cic->ioprio);
Jens Axboefe094d92008-01-31 13:08:54 +01003693 cfqq->ioprio_class = IOPRIO_CLASS_RT;
3694 break;
3695 case IOPRIO_CLASS_BE:
Tejun Heo598971b2012-03-19 15:10:58 -07003696 cfqq->ioprio = IOPRIO_PRIO_DATA(cic->ioprio);
Jens Axboefe094d92008-01-31 13:08:54 +01003697 cfqq->ioprio_class = IOPRIO_CLASS_BE;
3698 break;
3699 case IOPRIO_CLASS_IDLE:
3700 cfqq->ioprio_class = IOPRIO_CLASS_IDLE;
3701 cfqq->ioprio = 7;
3702 cfq_clear_cfqq_idle_window(cfqq);
3703 break;
Jens Axboe22e2c502005-06-27 10:55:12 +02003704 }
3705
3706 /*
3707 * keep track of original prio settings in case we have to temporarily
3708 * elevate the priority of this queue
3709 */
3710 cfqq->org_ioprio = cfqq->ioprio;
Jens Axboeb8269db2016-06-09 15:47:29 -06003711 cfqq->org_ioprio_class = cfqq->ioprio_class;
Jens Axboe3b181522005-06-27 10:56:24 +02003712 cfq_clear_cfqq_prio_changed(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02003713}
3714
Tejun Heo598971b2012-03-19 15:10:58 -07003715static void check_ioprio_changed(struct cfq_io_cq *cic, struct bio *bio)
Jens Axboe22e2c502005-06-27 10:55:12 +02003716{
Tejun Heo598971b2012-03-19 15:10:58 -07003717 int ioprio = cic->icq.ioc->ioprio;
Konstantin Khlebnikovbca4b912010-05-20 23:21:34 +04003718 struct cfq_data *cfqd = cic_to_cfqd(cic);
Al Viro478a82b2006-03-18 13:25:24 -05003719 struct cfq_queue *cfqq;
Jens Axboe35e60772006-06-14 09:10:45 +02003720
Tejun Heo598971b2012-03-19 15:10:58 -07003721 /*
3722 * Check whether ioprio has changed. The condition may trigger
3723 * spuriously on a newly created cic but there's no harm.
3724 */
3725 if (unlikely(!cfqd) || likely(cic->ioprio == ioprio))
Jens Axboecaaa5f92006-06-16 11:23:00 +02003726 return;
3727
Tejun Heo563180a2015-08-18 14:55:00 -07003728 cfqq = cic_to_cfqq(cic, false);
Jens Axboecaaa5f92006-06-16 11:23:00 +02003729 if (cfqq) {
Tejun Heo563180a2015-08-18 14:55:00 -07003730 cfq_put_queue(cfqq);
Tejun Heo2da8de02015-08-18 14:55:02 -07003731 cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic, bio);
Tejun Heo563180a2015-08-18 14:55:00 -07003732 cic_set_cfqq(cic, cfqq, false);
Jens Axboe22e2c502005-06-27 10:55:12 +02003733 }
Jens Axboecaaa5f92006-06-16 11:23:00 +02003734
Tejun Heo563180a2015-08-18 14:55:00 -07003735 cfqq = cic_to_cfqq(cic, true);
Jens Axboecaaa5f92006-06-16 11:23:00 +02003736 if (cfqq)
3737 cfq_mark_cfqq_prio_changed(cfqq);
Tejun Heo598971b2012-03-19 15:10:58 -07003738
3739 cic->ioprio = ioprio;
Jens Axboe22e2c502005-06-27 10:55:12 +02003740}
3741
Jens Axboed5036d72009-06-26 10:44:34 +02003742static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
Jens Axboea6151c32009-10-07 20:02:57 +02003743 pid_t pid, bool is_sync)
Jens Axboed5036d72009-06-26 10:44:34 +02003744{
3745 RB_CLEAR_NODE(&cfqq->rb_node);
3746 RB_CLEAR_NODE(&cfqq->p_node);
3747 INIT_LIST_HEAD(&cfqq->fifo);
3748
Shaohua Li30d7b942011-01-07 08:46:59 +01003749 cfqq->ref = 0;
Jens Axboed5036d72009-06-26 10:44:34 +02003750 cfqq->cfqd = cfqd;
3751
3752 cfq_mark_cfqq_prio_changed(cfqq);
3753
3754 if (is_sync) {
3755 if (!cfq_class_idle(cfqq))
3756 cfq_mark_cfqq_idle_window(cfqq);
3757 cfq_mark_cfqq_sync(cfqq);
3758 }
3759 cfqq->pid = pid;
3760}
3761
Vivek Goyal246103332009-12-03 12:59:51 -05003762#ifdef CONFIG_CFQ_GROUP_IOSCHED
Jens Axboe5d7f5ce2017-02-16 07:57:33 -07003763static bool check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
Vivek Goyal246103332009-12-03 12:59:51 -05003764{
Konstantin Khlebnikovbca4b912010-05-20 23:21:34 +04003765 struct cfq_data *cfqd = cic_to_cfqd(cic);
Tejun Heo60a83702015-08-18 14:55:05 -07003766 struct cfq_queue *cfqq;
Tejun Heof4da8072014-09-08 08:15:20 +09003767 uint64_t serial_nr;
Jens Axboe87760e52016-11-09 12:38:14 -07003768 bool nonroot_cg;
Vivek Goyal246103332009-12-03 12:59:51 -05003769
Tejun Heo598971b2012-03-19 15:10:58 -07003770 rcu_read_lock();
Tejun Heof4da8072014-09-08 08:15:20 +09003771 serial_nr = bio_blkcg(bio)->css.serial_nr;
Jens Axboe87760e52016-11-09 12:38:14 -07003772 nonroot_cg = bio_blkcg(bio) != &blkcg_root;
Tejun Heo598971b2012-03-19 15:10:58 -07003773 rcu_read_unlock();
3774
3775 /*
3776 * Check whether blkcg has changed. The condition may trigger
3777 * spuriously on a newly created cic but there's no harm.
3778 */
Tejun Heof4da8072014-09-08 08:15:20 +09003779 if (unlikely(!cfqd) || likely(cic->blkcg_serial_nr == serial_nr))
Jens Axboe5d7f5ce2017-02-16 07:57:33 -07003780 return nonroot_cg;
Jens Axboe87760e52016-11-09 12:38:14 -07003781
3782 /*
Tejun Heo60a83702015-08-18 14:55:05 -07003783 * Drop reference to queues. New queues will be assigned in new
3784 * group upon arrival of fresh requests.
3785 */
3786 cfqq = cic_to_cfqq(cic, false);
3787 if (cfqq) {
3788 cfq_log_cfqq(cfqd, cfqq, "changed cgroup");
3789 cic_set_cfqq(cic, NULL, false);
3790 cfq_put_queue(cfqq);
3791 }
3792
3793 cfqq = cic_to_cfqq(cic, true);
3794 if (cfqq) {
3795 cfq_log_cfqq(cfqd, cfqq, "changed cgroup");
3796 cic_set_cfqq(cic, NULL, true);
3797 cfq_put_queue(cfqq);
Vivek Goyal246103332009-12-03 12:59:51 -05003798 }
Tejun Heo598971b2012-03-19 15:10:58 -07003799
Tejun Heof4da8072014-09-08 08:15:20 +09003800 cic->blkcg_serial_nr = serial_nr;
Jens Axboe5d7f5ce2017-02-16 07:57:33 -07003801 return nonroot_cg;
Vivek Goyal246103332009-12-03 12:59:51 -05003802}
Tejun Heo598971b2012-03-19 15:10:58 -07003803#else
Jens Axboe5d7f5ce2017-02-16 07:57:33 -07003804static inline bool check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
3805{
3806 return false;
3807}
Vivek Goyal246103332009-12-03 12:59:51 -05003808#endif /* CONFIG_CFQ_GROUP_IOSCHED */
3809
Vasily Tarasovc2dea2d2007-07-20 10:06:38 +02003810static struct cfq_queue **
Tejun Heo60a83702015-08-18 14:55:05 -07003811cfq_async_queue_prio(struct cfq_group *cfqg, int ioprio_class, int ioprio)
Vasily Tarasovc2dea2d2007-07-20 10:06:38 +02003812{
Jens Axboefe094d92008-01-31 13:08:54 +01003813 switch (ioprio_class) {
Vasily Tarasovc2dea2d2007-07-20 10:06:38 +02003814 case IOPRIO_CLASS_RT:
Tejun Heo60a83702015-08-18 14:55:05 -07003815 return &cfqg->async_cfqq[0][ioprio];
Tejun Heo598971b2012-03-19 15:10:58 -07003816 case IOPRIO_CLASS_NONE:
3817 ioprio = IOPRIO_NORM;
3818 /* fall through */
Vasily Tarasovc2dea2d2007-07-20 10:06:38 +02003819 case IOPRIO_CLASS_BE:
Tejun Heo60a83702015-08-18 14:55:05 -07003820 return &cfqg->async_cfqq[1][ioprio];
Vasily Tarasovc2dea2d2007-07-20 10:06:38 +02003821 case IOPRIO_CLASS_IDLE:
Tejun Heo60a83702015-08-18 14:55:05 -07003822 return &cfqg->async_idle_cfqq;
Vasily Tarasovc2dea2d2007-07-20 10:06:38 +02003823 default:
3824 BUG();
3825 }
3826}
3827
Jens Axboe15c31be2007-07-10 13:43:25 +02003828static struct cfq_queue *
Tejun Heoabede6d2012-03-19 15:10:57 -07003829cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
Tejun Heo2da8de02015-08-18 14:55:02 -07003830 struct bio *bio)
Jens Axboe15c31be2007-07-10 13:43:25 +02003831{
Jeff Moyerc6ce1942015-01-12 15:21:01 -05003832 int ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio);
3833 int ioprio = IOPRIO_PRIO_DATA(cic->ioprio);
Tejun Heod4aad7f2015-08-18 14:55:04 -07003834 struct cfq_queue **async_cfqq = NULL;
Tejun Heo4ebc1c62015-08-18 14:54:57 -07003835 struct cfq_queue *cfqq;
Tejun Heo322731e2015-08-18 14:55:03 -07003836 struct cfq_group *cfqg;
3837
3838 rcu_read_lock();
Tejun Heoae118892015-08-18 14:55:20 -07003839 cfqg = cfq_lookup_cfqg(cfqd, bio_blkcg(bio));
Tejun Heo322731e2015-08-18 14:55:03 -07003840 if (!cfqg) {
3841 cfqq = &cfqd->oom_cfqq;
3842 goto out;
3843 }
Jens Axboe15c31be2007-07-10 13:43:25 +02003844
Vasily Tarasovc2dea2d2007-07-20 10:06:38 +02003845 if (!is_sync) {
Jeff Moyerc6ce1942015-01-12 15:21:01 -05003846 if (!ioprio_valid(cic->ioprio)) {
3847 struct task_struct *tsk = current;
3848 ioprio = task_nice_ioprio(tsk);
3849 ioprio_class = task_nice_ioclass(tsk);
3850 }
Tejun Heo60a83702015-08-18 14:55:05 -07003851 async_cfqq = cfq_async_queue_prio(cfqg, ioprio_class, ioprio);
Vasily Tarasovc2dea2d2007-07-20 10:06:38 +02003852 cfqq = *async_cfqq;
Tejun Heo4ebc1c62015-08-18 14:54:57 -07003853 if (cfqq)
3854 goto out;
Vasily Tarasovc2dea2d2007-07-20 10:06:38 +02003855 }
3856
Tejun Heoe00f4f42016-11-21 18:03:32 -05003857 cfqq = kmem_cache_alloc_node(cfq_pool,
3858 GFP_NOWAIT | __GFP_ZERO | __GFP_NOWARN,
Tejun Heod4aad7f2015-08-18 14:55:04 -07003859 cfqd->queue->node);
3860 if (!cfqq) {
3861 cfqq = &cfqd->oom_cfqq;
3862 goto out;
3863 }
Jens Axboe15c31be2007-07-10 13:43:25 +02003864
Alexander Potapenko4d608ba2017-01-23 15:06:43 +01003865 /* cfq_init_cfqq() assumes cfqq->ioprio_class is initialized. */
3866 cfqq->ioprio_class = IOPRIO_CLASS_NONE;
Tejun Heod4aad7f2015-08-18 14:55:04 -07003867 cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
3868 cfq_init_prio_data(cfqq, cic);
3869 cfq_link_cfqq_cfqg(cfqq, cfqg);
3870 cfq_log_cfqq(cfqd, cfqq, "alloced");
3871
3872 if (async_cfqq) {
3873 /* a new async queue is created, pin and remember */
Shaohua Li30d7b942011-01-07 08:46:59 +01003874 cfqq->ref++;
Vasily Tarasovc2dea2d2007-07-20 10:06:38 +02003875 *async_cfqq = cfqq;
Jens Axboe15c31be2007-07-10 13:43:25 +02003876 }
Tejun Heo4ebc1c62015-08-18 14:54:57 -07003877out:
Shaohua Li30d7b942011-01-07 08:46:59 +01003878 cfqq->ref++;
Tejun Heo322731e2015-08-18 14:55:03 -07003879 rcu_read_unlock();
Jens Axboe15c31be2007-07-10 13:43:25 +02003880 return cfqq;
3881}
3882
Jens Axboe22e2c502005-06-27 10:55:12 +02003883static void
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003884__cfq_update_io_thinktime(struct cfq_ttime *ttime, u64 slice_idle)
Jens Axboe22e2c502005-06-27 10:55:12 +02003885{
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003886 u64 elapsed = ktime_get_ns() - ttime->last_end_request;
Shaohua Li383cd722011-07-12 14:24:35 +02003887 elapsed = min(elapsed, 2UL * slice_idle);
Jens Axboe22e2c502005-06-27 10:55:12 +02003888
Shaohua Li383cd722011-07-12 14:24:35 +02003889 ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003890 ttime->ttime_total = div_u64(7*ttime->ttime_total + 256*elapsed, 8);
3891 ttime->ttime_mean = div64_ul(ttime->ttime_total + 128,
3892 ttime->ttime_samples);
Shaohua Li383cd722011-07-12 14:24:35 +02003893}
3894
3895static void
3896cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
Tejun Heoc5869802011-12-14 00:33:41 +01003897 struct cfq_io_cq *cic)
Shaohua Li383cd722011-07-12 14:24:35 +02003898{
Shaohua Lif5f2b6c2011-07-12 14:24:55 +02003899 if (cfq_cfqq_sync(cfqq)) {
Shaohua Li383cd722011-07-12 14:24:35 +02003900 __cfq_update_io_thinktime(&cic->ttime, cfqd->cfq_slice_idle);
Shaohua Lif5f2b6c2011-07-12 14:24:55 +02003901 __cfq_update_io_thinktime(&cfqq->service_tree->ttime,
3902 cfqd->cfq_slice_idle);
3903 }
Shaohua Li7700fc42011-07-12 14:24:56 +02003904#ifdef CONFIG_CFQ_GROUP_IOSCHED
3905 __cfq_update_io_thinktime(&cfqq->cfqg->ttime, cfqd->cfq_group_idle);
3906#endif
Jens Axboe22e2c502005-06-27 10:55:12 +02003907}
3908
Jens Axboe206dc692006-03-28 13:03:44 +02003909static void
Jeff Moyerb2c18e12009-10-23 17:14:49 -04003910cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
Jens Axboe6d048f52007-04-25 12:44:27 +02003911 struct request *rq)
Jens Axboe206dc692006-03-28 13:03:44 +02003912{
Corrado Zoccolo3dde36d2010-02-27 19:45:39 +01003913 sector_t sdist = 0;
Corrado Zoccolo41647e72010-02-27 19:45:40 +01003914 sector_t n_sec = blk_rq_sectors(rq);
Corrado Zoccolo3dde36d2010-02-27 19:45:39 +01003915 if (cfqq->last_request_pos) {
3916 if (cfqq->last_request_pos < blk_rq_pos(rq))
3917 sdist = blk_rq_pos(rq) - cfqq->last_request_pos;
3918 else
3919 sdist = cfqq->last_request_pos - blk_rq_pos(rq);
3920 }
Jens Axboe206dc692006-03-28 13:03:44 +02003921
Corrado Zoccolo3dde36d2010-02-27 19:45:39 +01003922 cfqq->seek_history <<= 1;
Corrado Zoccolo41647e72010-02-27 19:45:40 +01003923 if (blk_queue_nonrot(cfqd->queue))
3924 cfqq->seek_history |= (n_sec < CFQQ_SECT_THR_NONROT);
3925 else
3926 cfqq->seek_history |= (sdist > CFQQ_SEEK_THR);
Jens Axboe206dc692006-03-28 13:03:44 +02003927}
Jens Axboe22e2c502005-06-27 10:55:12 +02003928
Christoph Hellwiga2b80962016-11-01 07:40:09 -06003929static inline bool req_noidle(struct request *req)
3930{
3931 return req_op(req) == REQ_OP_WRITE &&
3932 (req->cmd_flags & (REQ_SYNC | REQ_IDLE)) == REQ_SYNC;
3933}
3934
Jens Axboe22e2c502005-06-27 10:55:12 +02003935/*
3936 * Disable idle window if the process thinks too long or seeks so much that
3937 * it doesn't matter
3938 */
3939static void
3940cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
Tejun Heoc5869802011-12-14 00:33:41 +01003941 struct cfq_io_cq *cic)
Jens Axboe22e2c502005-06-27 10:55:12 +02003942{
Jens Axboe7b679132008-05-30 12:23:07 +02003943 int old_idle, enable_idle;
Jens Axboe1be92f2f2007-04-19 14:32:26 +02003944
Jens Axboe08717142008-01-28 11:38:15 +01003945 /*
3946 * Don't idle for async or idle io prio class
3947 */
3948 if (!cfq_cfqq_sync(cfqq) || cfq_class_idle(cfqq))
Jens Axboe1be92f2f2007-04-19 14:32:26 +02003949 return;
3950
Jens Axboec265a7f2008-06-26 13:49:33 +02003951 enable_idle = old_idle = cfq_cfqq_idle_window(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02003952
Corrado Zoccolo76280af2009-11-26 10:02:58 +01003953 if (cfqq->queued[0] + cfqq->queued[1] >= 4)
3954 cfq_mark_cfqq_deep(cfqq);
3955
Christoph Hellwiga2b80962016-11-01 07:40:09 -06003956 if (cfqq->next_rq && req_noidle(cfqq->next_rq))
Corrado Zoccolo749ef9f2010-09-20 15:24:50 +02003957 enable_idle = 0;
Tejun Heof6e8d012012-03-05 13:15:26 -08003958 else if (!atomic_read(&cic->icq.ioc->active_ref) ||
Tejun Heoc5869802011-12-14 00:33:41 +01003959 !cfqd->cfq_slice_idle ||
3960 (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
Jens Axboe22e2c502005-06-27 10:55:12 +02003961 enable_idle = 0;
Shaohua Li383cd722011-07-12 14:24:35 +02003962 else if (sample_valid(cic->ttime.ttime_samples)) {
3963 if (cic->ttime.ttime_mean > cfqd->cfq_slice_idle)
Jens Axboe22e2c502005-06-27 10:55:12 +02003964 enable_idle = 0;
3965 else
3966 enable_idle = 1;
3967 }
3968
Jens Axboe7b679132008-05-30 12:23:07 +02003969 if (old_idle != enable_idle) {
3970 cfq_log_cfqq(cfqd, cfqq, "idle=%d", enable_idle);
3971 if (enable_idle)
3972 cfq_mark_cfqq_idle_window(cfqq);
3973 else
3974 cfq_clear_cfqq_idle_window(cfqq);
3975 }
Jens Axboe22e2c502005-06-27 10:55:12 +02003976}
3977
Jens Axboe22e2c502005-06-27 10:55:12 +02003978/*
3979 * Check if new_cfqq should preempt the currently active queue. Return 0 for
3980 * no or if we aren't sure, a 1 will cause a preempt.
3981 */
Jens Axboea6151c32009-10-07 20:02:57 +02003982static bool
Jens Axboe22e2c502005-06-27 10:55:12 +02003983cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
Jens Axboe5e705372006-07-13 12:39:25 +02003984 struct request *rq)
Jens Axboe22e2c502005-06-27 10:55:12 +02003985{
Jens Axboe6d048f52007-04-25 12:44:27 +02003986 struct cfq_queue *cfqq;
Jens Axboe22e2c502005-06-27 10:55:12 +02003987
Jens Axboe6d048f52007-04-25 12:44:27 +02003988 cfqq = cfqd->active_queue;
3989 if (!cfqq)
Jens Axboea6151c32009-10-07 20:02:57 +02003990 return false;
Jens Axboe22e2c502005-06-27 10:55:12 +02003991
Jens Axboe6d048f52007-04-25 12:44:27 +02003992 if (cfq_class_idle(new_cfqq))
Jens Axboea6151c32009-10-07 20:02:57 +02003993 return false;
Jens Axboe22e2c502005-06-27 10:55:12 +02003994
3995 if (cfq_class_idle(cfqq))
Jens Axboea6151c32009-10-07 20:02:57 +02003996 return true;
Jens Axboe1e3335d2007-02-14 19:59:49 +01003997
Jens Axboe22e2c502005-06-27 10:55:12 +02003998 /*
Divyesh Shah875feb62010-01-06 18:58:20 -08003999 * Don't allow a non-RT request to preempt an ongoing RT cfqq timeslice.
4000 */
4001 if (cfq_class_rt(cfqq) && !cfq_class_rt(new_cfqq))
4002 return false;
4003
4004 /*
Jens Axboe374f84a2006-07-23 01:42:19 +02004005 * if the new request is sync, but the currently running queue is
4006 * not, let the sync request have priority.
4007 */
Glauber Costa3932a862016-09-22 20:59:59 -04004008 if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq) && !cfq_cfqq_must_dispatch(cfqq))
Jens Axboea6151c32009-10-07 20:02:57 +02004009 return true;
Jens Axboe1e3335d2007-02-14 19:59:49 +01004010
Jan Kara3984aa52016-01-12 16:24:19 +01004011 /*
4012 * Treat ancestors of current cgroup the same way as current cgroup.
4013 * For anybody else we disallow preemption to guarantee service
4014 * fairness among cgroups.
4015 */
4016 if (!cfqg_is_descendant(cfqq->cfqg, new_cfqq->cfqg))
Vivek Goyal8682e1f2009-12-03 12:59:50 -05004017 return false;
4018
4019 if (cfq_slice_used(cfqq))
4020 return true;
4021
Jan Kara6c80731c2016-01-12 16:24:16 +01004022 /*
4023 * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
4024 */
4025 if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
4026 return true;
4027
4028 WARN_ON_ONCE(cfqq->ioprio_class != new_cfqq->ioprio_class);
Vivek Goyal8682e1f2009-12-03 12:59:50 -05004029 /* Allow preemption only if we are idling on sync-noidle tree */
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04004030 if (cfqd->serving_wl_type == SYNC_NOIDLE_WORKLOAD &&
Vivek Goyal8682e1f2009-12-03 12:59:50 -05004031 cfqq_type(new_cfqq) == SYNC_NOIDLE_WORKLOAD &&
Vivek Goyal8682e1f2009-12-03 12:59:50 -05004032 RB_EMPTY_ROOT(&cfqq->sort_list))
4033 return true;
4034
Jens Axboe374f84a2006-07-23 01:42:19 +02004035 /*
Jens Axboeb53d1ed2011-08-19 08:34:48 +02004036 * So both queues are sync. Let the new request get disk time if
4037 * it's a metadata request and the current queue is doing regular IO.
4038 */
Christoph Hellwig65299a32011-08-23 14:50:29 +02004039 if ((rq->cmd_flags & REQ_PRIO) && !cfqq->prio_pending)
Jens Axboeb53d1ed2011-08-19 08:34:48 +02004040 return true;
4041
Shaohua Lid2d59e12010-11-08 15:01:03 +01004042 /* An idle queue should not be idle now for some reason */
4043 if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq))
4044 return true;
4045
Jens Axboe1e3335d2007-02-14 19:59:49 +01004046 if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq))
Jens Axboea6151c32009-10-07 20:02:57 +02004047 return false;
Jens Axboe1e3335d2007-02-14 19:59:49 +01004048
4049 /*
4050 * if this request is as-good as one we would expect from the
4051 * current cfqq, let it preempt
4052 */
Shaohua Lie9ce3352010-03-19 08:03:04 +01004053 if (cfq_rq_close(cfqd, cfqq, rq))
Jens Axboea6151c32009-10-07 20:02:57 +02004054 return true;
Jens Axboe1e3335d2007-02-14 19:59:49 +01004055
Jens Axboea6151c32009-10-07 20:02:57 +02004056 return false;
Jens Axboe22e2c502005-06-27 10:55:12 +02004057}
4058
4059/*
4060 * cfqq preempts the active queue. if we allowed preempt with no slice left,
4061 * let it have half of its nominal slice.
4062 */
4063static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
4064{
Shaohua Lidf0793a2012-01-19 09:20:09 +01004065 enum wl_type_t old_type = cfqq_type(cfqd->active_queue);
4066
Jens Axboe7b679132008-05-30 12:23:07 +02004067 cfq_log_cfqq(cfqd, cfqq, "preempt");
Shaohua Lidf0793a2012-01-19 09:20:09 +01004068 cfq_slice_expired(cfqd, 1);
Jens Axboe22e2c502005-06-27 10:55:12 +02004069
Jens Axboebf572252006-07-19 20:29:12 +02004070 /*
Shaohua Lif8ae6e32011-01-14 08:41:02 +01004071 * workload type is changed, don't save slice, otherwise preempt
4072 * doesn't happen
4073 */
Shaohua Lidf0793a2012-01-19 09:20:09 +01004074 if (old_type != cfqq_type(cfqq))
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04004075 cfqq->cfqg->saved_wl_slice = 0;
Shaohua Lif8ae6e32011-01-14 08:41:02 +01004076
4077 /*
Jens Axboebf572252006-07-19 20:29:12 +02004078 * Put the new queue at the front of the of the current list,
4079 * so we know that it will be selected next.
4080 */
4081 BUG_ON(!cfq_cfqq_on_rr(cfqq));
Jens Axboeedd75ff2007-04-19 12:03:34 +02004082
4083 cfq_service_tree_add(cfqd, cfqq, 1);
Justin TerAvesteda5e0c2011-03-22 21:26:49 +01004084
Justin TerAvest62a37f62011-03-23 08:25:44 +01004085 cfqq->slice_end = 0;
4086 cfq_mark_cfqq_slice_new(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02004087}
4088
4089/*
Jens Axboe5e705372006-07-13 12:39:25 +02004090 * Called when a new fs request (rq) is added (to cfqq). Check if there's
Jens Axboe22e2c502005-06-27 10:55:12 +02004091 * something we should do about it
4092 */
4093static void
Jens Axboe5e705372006-07-13 12:39:25 +02004094cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
4095 struct request *rq)
Jens Axboe22e2c502005-06-27 10:55:12 +02004096{
Tejun Heoc5869802011-12-14 00:33:41 +01004097 struct cfq_io_cq *cic = RQ_CIC(rq);
Jens Axboe12e9fdd2006-06-01 10:09:56 +02004098
Aaron Carroll45333d52008-08-26 15:52:36 +02004099 cfqd->rq_queued++;
Christoph Hellwig65299a32011-08-23 14:50:29 +02004100 if (rq->cmd_flags & REQ_PRIO)
4101 cfqq->prio_pending++;
Jens Axboe374f84a2006-07-23 01:42:19 +02004102
Shaohua Li383cd722011-07-12 14:24:35 +02004103 cfq_update_io_thinktime(cfqd, cfqq, cic);
Jeff Moyerb2c18e12009-10-23 17:14:49 -04004104 cfq_update_io_seektime(cfqd, cfqq, rq);
Jens Axboe9c2c38a2005-08-24 14:57:54 +02004105 cfq_update_idle_window(cfqd, cfqq, cic);
4106
Jeff Moyerb2c18e12009-10-23 17:14:49 -04004107 cfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
Jens Axboe22e2c502005-06-27 10:55:12 +02004108
4109 if (cfqq == cfqd->active_queue) {
4110 /*
Jens Axboeb0291952009-04-07 11:38:31 +02004111 * Remember that we saw a request from this process, but
4112 * don't start queuing just yet. Otherwise we risk seeing lots
4113 * of tiny requests, because we disrupt the normal plugging
Jens Axboed6ceb252009-04-14 14:18:16 +02004114 * and merging. If the request is already larger than a single
4115 * page, let it rip immediately. For that case we assume that
Jens Axboe2d870722009-04-15 12:12:46 +02004116 * merging is already done. Ditto for a busy system that
4117 * has other work pending, don't risk delaying until the
4118 * idle timer unplug to continue working.
Jens Axboe22e2c502005-06-27 10:55:12 +02004119 */
Jens Axboed6ceb252009-04-14 14:18:16 +02004120 if (cfq_cfqq_wait_request(cfqq)) {
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03004121 if (blk_rq_bytes(rq) > PAGE_SIZE ||
Jens Axboe2d870722009-04-15 12:12:46 +02004122 cfqd->busy_queues > 1) {
Divyesh Shah812df482010-04-08 21:15:35 -07004123 cfq_del_timer(cfqd, cfqq);
Gui Jianfeng554554f2009-12-10 09:38:39 +01004124 cfq_clear_cfqq_wait_request(cfqq);
Christoph Hellwig24ecfbe2011-04-18 11:41:33 +02004125 __blk_run_queue(cfqd->queue);
Divyesh Shaha11cdaa2010-04-13 19:59:17 +02004126 } else {
Tejun Heo155fead2012-04-01 14:38:44 -07004127 cfqg_stats_update_idle_time(cfqq->cfqg);
Vivek Goyalbf7919372009-12-03 12:59:37 -05004128 cfq_mark_cfqq_must_dispatch(cfqq);
Divyesh Shaha11cdaa2010-04-13 19:59:17 +02004129 }
Jens Axboed6ceb252009-04-14 14:18:16 +02004130 }
Jens Axboe5e705372006-07-13 12:39:25 +02004131 } else if (cfq_should_preempt(cfqd, cfqq, rq)) {
Jens Axboe22e2c502005-06-27 10:55:12 +02004132 /*
4133 * not the active queue - expire current slice if it is
4134 * idle and has expired it's mean thinktime or this new queue
Divyesh Shah3a9a3f62009-01-30 12:46:41 +01004135 * has some old slice time left and is of higher priority or
4136 * this new queue is RT and the current one is BE
Jens Axboe22e2c502005-06-27 10:55:12 +02004137 */
4138 cfq_preempt_queue(cfqd, cfqq);
Christoph Hellwig24ecfbe2011-04-18 11:41:33 +02004139 __blk_run_queue(cfqd->queue);
Jens Axboe22e2c502005-06-27 10:55:12 +02004140 }
4141}
4142
Jens Axboe165125e2007-07-24 09:28:11 +02004143static void cfq_insert_request(struct request_queue *q, struct request *rq)
Jens Axboe22e2c502005-06-27 10:55:12 +02004144{
Jens Axboeb4878f22005-10-20 16:42:29 +02004145 struct cfq_data *cfqd = q->elevator->elevator_data;
Jens Axboe5e705372006-07-13 12:39:25 +02004146 struct cfq_queue *cfqq = RQ_CFQQ(rq);
Jens Axboe22e2c502005-06-27 10:55:12 +02004147
Jens Axboe7b679132008-05-30 12:23:07 +02004148 cfq_log_cfqq(cfqd, cfqq, "insert_request");
Tejun Heoabede6d2012-03-19 15:10:57 -07004149 cfq_init_prio_data(cfqq, RQ_CIC(rq));
Linus Torvalds1da177e2005-04-16 15:20:36 -07004150
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06004151 rq->fifo_time = ktime_get_ns() + cfqd->cfq_fifo_expire[rq_is_sync(rq)];
Jens Axboe22e2c502005-06-27 10:55:12 +02004152 list_add_tail(&rq->queuelist, &cfqq->fifo);
Corrado Zoccoloaa6f6a32009-10-26 22:44:33 +01004153 cfq_add_rq_rb(rq);
Christoph Hellwigef295ec2016-10-28 08:48:16 -06004154 cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group,
Tejun Heo155fead2012-04-01 14:38:44 -07004155 rq->cmd_flags);
Jens Axboe5e705372006-07-13 12:39:25 +02004156 cfq_rq_enqueued(cfqd, cfqq, rq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004157}
4158
Aaron Carroll45333d52008-08-26 15:52:36 +02004159/*
4160 * Update hw_tag based on peak queue depth over 50 samples under
4161 * sufficient load.
4162 */
4163static void cfq_update_hw_tag(struct cfq_data *cfqd)
4164{
Shaohua Li1a1238a2009-10-27 08:46:23 +01004165 struct cfq_queue *cfqq = cfqd->active_queue;
4166
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01004167 if (cfqd->rq_in_driver > cfqd->hw_tag_est_depth)
4168 cfqd->hw_tag_est_depth = cfqd->rq_in_driver;
Corrado Zoccoloe459dd02009-11-26 10:02:57 +01004169
4170 if (cfqd->hw_tag == 1)
4171 return;
Aaron Carroll45333d52008-08-26 15:52:36 +02004172
4173 if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN &&
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01004174 cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN)
Aaron Carroll45333d52008-08-26 15:52:36 +02004175 return;
4176
Shaohua Li1a1238a2009-10-27 08:46:23 +01004177 /*
4178 * If active queue hasn't enough requests and can idle, cfq might not
4179 * dispatch sufficient requests to hardware. Don't zero hw_tag in this
4180 * case
4181 */
4182 if (cfqq && cfq_cfqq_idle_window(cfqq) &&
4183 cfqq->dispatched + cfqq->queued[0] + cfqq->queued[1] <
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01004184 CFQ_HW_QUEUE_MIN && cfqd->rq_in_driver < CFQ_HW_QUEUE_MIN)
Shaohua Li1a1238a2009-10-27 08:46:23 +01004185 return;
4186
Aaron Carroll45333d52008-08-26 15:52:36 +02004187 if (cfqd->hw_tag_samples++ < 50)
4188 return;
4189
Corrado Zoccoloe459dd02009-11-26 10:02:57 +01004190 if (cfqd->hw_tag_est_depth >= CFQ_HW_QUEUE_MIN)
Aaron Carroll45333d52008-08-26 15:52:36 +02004191 cfqd->hw_tag = 1;
4192 else
4193 cfqd->hw_tag = 0;
Aaron Carroll45333d52008-08-26 15:52:36 +02004194}
4195
Vivek Goyal7667aa02009-12-08 17:52:58 -05004196static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
4197{
Tejun Heoc5869802011-12-14 00:33:41 +01004198 struct cfq_io_cq *cic = cfqd->active_cic;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06004199 u64 now = ktime_get_ns();
Vivek Goyal7667aa02009-12-08 17:52:58 -05004200
Justin TerAvest02a8f012011-02-09 14:20:03 +01004201 /* If the queue already has requests, don't wait */
4202 if (!RB_EMPTY_ROOT(&cfqq->sort_list))
4203 return false;
4204
Vivek Goyal7667aa02009-12-08 17:52:58 -05004205 /* If there are other queues in the group, don't wait */
4206 if (cfqq->cfqg->nr_cfqq > 1)
4207 return false;
4208
Shaohua Li7700fc42011-07-12 14:24:56 +02004209 /* the only queue in the group, but think time is big */
4210 if (cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true))
4211 return false;
4212
Vivek Goyal7667aa02009-12-08 17:52:58 -05004213 if (cfq_slice_used(cfqq))
4214 return true;
4215
4216 /* if slice left is less than think time, wait busy */
Shaohua Li383cd722011-07-12 14:24:35 +02004217 if (cic && sample_valid(cic->ttime.ttime_samples)
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06004218 && (cfqq->slice_end - now < cic->ttime.ttime_mean))
Vivek Goyal7667aa02009-12-08 17:52:58 -05004219 return true;
4220
4221 /*
4222 * If think times is less than a jiffy than ttime_mean=0 and above
4223 * will not be true. It might happen that slice has not expired yet
4224 * but will expire soon (4-5 ns) during select_queue(). To cover the
4225 * case where think time is less than a jiffy, mark the queue wait
4226 * busy if only 1 jiffy is left in the slice.
4227 */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06004228 if (cfqq->slice_end - now <= jiffies_to_nsecs(1))
Vivek Goyal7667aa02009-12-08 17:52:58 -05004229 return true;
4230
4231 return false;
4232}
4233
Jens Axboe165125e2007-07-24 09:28:11 +02004234static void cfq_completed_request(struct request_queue *q, struct request *rq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004235{
Jens Axboe5e705372006-07-13 12:39:25 +02004236 struct cfq_queue *cfqq = RQ_CFQQ(rq);
Jens Axboeb4878f22005-10-20 16:42:29 +02004237 struct cfq_data *cfqd = cfqq->cfqd;
Jens Axboe5380a102006-07-13 12:37:56 +02004238 const int sync = rq_is_sync(rq);
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06004239 u64 now = ktime_get_ns();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004240
Christoph Hellwiga2b80962016-11-01 07:40:09 -06004241 cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d", req_noidle(rq));
Linus Torvalds1da177e2005-04-16 15:20:36 -07004242
Aaron Carroll45333d52008-08-26 15:52:36 +02004243 cfq_update_hw_tag(cfqd);
4244
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01004245 WARN_ON(!cfqd->rq_in_driver);
Jens Axboe6d048f52007-04-25 12:44:27 +02004246 WARN_ON(!cfqq->dispatched);
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01004247 cfqd->rq_in_driver--;
Jens Axboe6d048f52007-04-25 12:44:27 +02004248 cfqq->dispatched--;
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02004249 (RQ_CFQG(rq))->dispatched--;
Tejun Heo155fead2012-04-01 14:38:44 -07004250 cfqg_stats_update_completion(cfqq->cfqg, rq_start_time_ns(rq),
Christoph Hellwigef295ec2016-10-28 08:48:16 -06004251 rq_io_start_time_ns(rq), rq->cmd_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004252
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01004253 cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
Jens Axboe3ed9a292007-04-23 08:33:33 +02004254
Vivek Goyal365722b2009-10-03 15:21:27 +02004255 if (sync) {
Vivek Goyal34b98d02012-10-03 16:56:58 -04004256 struct cfq_rb_root *st;
Shaohua Lif5f2b6c2011-07-12 14:24:55 +02004257
Shaohua Li383cd722011-07-12 14:24:35 +02004258 RQ_CIC(rq)->ttime.last_end_request = now;
Shaohua Lif5f2b6c2011-07-12 14:24:55 +02004259
4260 if (cfq_cfqq_on_rr(cfqq))
Vivek Goyal34b98d02012-10-03 16:56:58 -04004261 st = cfqq->service_tree;
Shaohua Lif5f2b6c2011-07-12 14:24:55 +02004262 else
Vivek Goyal34b98d02012-10-03 16:56:58 -04004263 st = st_for(cfqq->cfqg, cfqq_class(cfqq),
4264 cfqq_type(cfqq));
4265
4266 st->ttime.last_end_request = now;
Jan Kara149321a2016-06-28 09:04:01 +02004267 /*
4268 * We have to do this check in jiffies since start_time is in
4269 * jiffies and it is not trivial to convert to ns. If
4270 * cfq_fifo_expire[1] ever comes close to 1 jiffie, this test
4271 * will become problematic but so far we are fine (the default
4272 * is 128 ms).
4273 */
4274 if (!time_after(rq->start_time +
4275 nsecs_to_jiffies(cfqd->cfq_fifo_expire[1]),
4276 jiffies))
Corrado Zoccolo573412b2009-12-06 11:48:52 +01004277 cfqd->last_delayed_sync = now;
Vivek Goyal365722b2009-10-03 15:21:27 +02004278 }
Jens Axboecaaa5f92006-06-16 11:23:00 +02004279
Shaohua Li7700fc42011-07-12 14:24:56 +02004280#ifdef CONFIG_CFQ_GROUP_IOSCHED
4281 cfqq->cfqg->ttime.last_end_request = now;
4282#endif
4283
Jens Axboecaaa5f92006-06-16 11:23:00 +02004284 /*
4285 * If this is the active queue, check if it needs to be expired,
4286 * or if we want to idle in case it has no pending requests.
4287 */
4288 if (cfqd->active_queue == cfqq) {
Jens Axboea36e71f2009-04-15 12:15:11 +02004289 const bool cfqq_empty = RB_EMPTY_ROOT(&cfqq->sort_list);
4290
Jens Axboe44f7c162007-01-19 11:51:58 +11004291 if (cfq_cfqq_slice_new(cfqq)) {
4292 cfq_set_prio_slice(cfqd, cfqq);
4293 cfq_clear_cfqq_slice_new(cfqq);
4294 }
Vivek Goyalf75edf22009-12-03 12:59:53 -05004295
4296 /*
Vivek Goyal7667aa02009-12-08 17:52:58 -05004297 * Should we wait for next request to come in before we expire
4298 * the queue.
Vivek Goyalf75edf22009-12-03 12:59:53 -05004299 */
Vivek Goyal7667aa02009-12-08 17:52:58 -05004300 if (cfq_should_wait_busy(cfqd, cfqq)) {
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06004301 u64 extend_sl = cfqd->cfq_slice_idle;
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02004302 if (!cfqd->cfq_slice_idle)
4303 extend_sl = cfqd->cfq_group_idle;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06004304 cfqq->slice_end = now + extend_sl;
Vivek Goyalf75edf22009-12-03 12:59:53 -05004305 cfq_mark_cfqq_wait_busy(cfqq);
Divyesh Shahb1ffe732010-03-25 15:45:03 +01004306 cfq_log_cfqq(cfqd, cfqq, "will busy wait");
Vivek Goyalf75edf22009-12-03 12:59:53 -05004307 }
4308
Jens Axboea36e71f2009-04-15 12:15:11 +02004309 /*
Corrado Zoccolo8e550632009-11-26 10:02:58 +01004310 * Idling is not enabled on:
4311 * - expired queues
4312 * - idle-priority queues
4313 * - async queues
4314 * - queues with still some requests queued
4315 * - when there is a close cooperator
Jens Axboea36e71f2009-04-15 12:15:11 +02004316 */
Jens Axboe08717142008-01-28 11:38:15 +01004317 if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq))
Vivek Goyale5ff0822010-04-26 19:25:11 +02004318 cfq_slice_expired(cfqd, 1);
Corrado Zoccolo8e550632009-11-26 10:02:58 +01004319 else if (sync && cfqq_empty &&
4320 !cfq_close_cooperator(cfqd, cfqq)) {
Corrado Zoccolo749ef9f2010-09-20 15:24:50 +02004321 cfq_arm_slice_timer(cfqd);
Corrado Zoccolo8e550632009-11-26 10:02:58 +01004322 }
Jens Axboecaaa5f92006-06-16 11:23:00 +02004323 }
Jens Axboe6d048f52007-04-25 12:44:27 +02004324
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01004325 if (!cfqd->rq_in_driver)
Jens Axboe23e018a2009-10-05 08:52:35 +02004326 cfq_schedule_dispatch(cfqd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004327}
4328
Christoph Hellwigef295ec2016-10-28 08:48:16 -06004329static void cfqq_boost_on_prio(struct cfq_queue *cfqq, unsigned int op)
Jens Axboeb8269db2016-06-09 15:47:29 -06004330{
4331 /*
4332 * If REQ_PRIO is set, boost class and prio level, if it's below
4333 * BE/NORM. If prio is not set, restore the potentially boosted
4334 * class/prio level.
4335 */
Christoph Hellwigef295ec2016-10-28 08:48:16 -06004336 if (!(op & REQ_PRIO)) {
Jens Axboeb8269db2016-06-09 15:47:29 -06004337 cfqq->ioprio_class = cfqq->org_ioprio_class;
4338 cfqq->ioprio = cfqq->org_ioprio;
4339 } else {
4340 if (cfq_class_idle(cfqq))
4341 cfqq->ioprio_class = IOPRIO_CLASS_BE;
4342 if (cfqq->ioprio > IOPRIO_NORM)
4343 cfqq->ioprio = IOPRIO_NORM;
4344 }
4345}
4346
Jens Axboe89850f72006-07-22 16:48:31 +02004347static inline int __cfq_may_queue(struct cfq_queue *cfqq)
Jens Axboe22e2c502005-06-27 10:55:12 +02004348{
Jens Axboe1b379d82009-08-11 08:26:11 +02004349 if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
Jens Axboe3b181522005-06-27 10:56:24 +02004350 cfq_mark_cfqq_must_alloc_slice(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02004351 return ELV_MQUEUE_MUST;
Jens Axboe3b181522005-06-27 10:56:24 +02004352 }
Jens Axboe22e2c502005-06-27 10:55:12 +02004353
4354 return ELV_MQUEUE_MAY;
Jens Axboe22e2c502005-06-27 10:55:12 +02004355}
4356
Christoph Hellwigef295ec2016-10-28 08:48:16 -06004357static int cfq_may_queue(struct request_queue *q, unsigned int op)
Jens Axboe22e2c502005-06-27 10:55:12 +02004358{
4359 struct cfq_data *cfqd = q->elevator->elevator_data;
4360 struct task_struct *tsk = current;
Tejun Heoc5869802011-12-14 00:33:41 +01004361 struct cfq_io_cq *cic;
Jens Axboe22e2c502005-06-27 10:55:12 +02004362 struct cfq_queue *cfqq;
4363
4364 /*
4365 * don't force setup of a queue from here, as a call to may_queue
4366 * does not necessarily imply that a request actually will be queued.
4367 * so just lookup a possibly existing queue, or return 'may queue'
4368 * if that fails
4369 */
Jens Axboe4ac845a2008-01-24 08:44:49 +01004370 cic = cfq_cic_lookup(cfqd, tsk->io_context);
Vasily Tarasov91fac312007-04-25 12:29:51 +02004371 if (!cic)
4372 return ELV_MQUEUE_MAY;
4373
Christoph Hellwigef295ec2016-10-28 08:48:16 -06004374 cfqq = cic_to_cfqq(cic, op_is_sync(op));
Jens Axboe22e2c502005-06-27 10:55:12 +02004375 if (cfqq) {
Tejun Heoabede6d2012-03-19 15:10:57 -07004376 cfq_init_prio_data(cfqq, cic);
Christoph Hellwigef295ec2016-10-28 08:48:16 -06004377 cfqq_boost_on_prio(cfqq, op);
Jens Axboe22e2c502005-06-27 10:55:12 +02004378
Jens Axboe89850f72006-07-22 16:48:31 +02004379 return __cfq_may_queue(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02004380 }
4381
4382 return ELV_MQUEUE_MAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004383}
4384
Linus Torvalds1da177e2005-04-16 15:20:36 -07004385/*
4386 * queue lock held here
4387 */
Jens Axboebb37b942006-12-01 10:42:33 +01004388static void cfq_put_request(struct request *rq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004389{
Jens Axboe5e705372006-07-13 12:39:25 +02004390 struct cfq_queue *cfqq = RQ_CFQQ(rq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004391
Jens Axboe5e705372006-07-13 12:39:25 +02004392 if (cfqq) {
Jens Axboe22e2c502005-06-27 10:55:12 +02004393 const int rw = rq_data_dir(rq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004394
Jens Axboe22e2c502005-06-27 10:55:12 +02004395 BUG_ON(!cfqq->allocated[rw]);
4396 cfqq->allocated[rw]--;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004397
Vivek Goyal7f1dc8a2010-04-21 17:44:16 +02004398 /* Put down rq reference on cfqg */
Tejun Heoeb7d8c072012-03-23 14:02:53 +01004399 cfqg_put(RQ_CFQG(rq));
Tejun Heoa612fdd2011-12-14 00:33:41 +01004400 rq->elv.priv[0] = NULL;
4401 rq->elv.priv[1] = NULL;
Vivek Goyal7f1dc8a2010-04-21 17:44:16 +02004402
Linus Torvalds1da177e2005-04-16 15:20:36 -07004403 cfq_put_queue(cfqq);
4404 }
4405}
4406
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04004407static struct cfq_queue *
Tejun Heoc5869802011-12-14 00:33:41 +01004408cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_cq *cic,
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04004409 struct cfq_queue *cfqq)
4410{
4411 cfq_log_cfqq(cfqd, cfqq, "merging with queue %p", cfqq->new_cfqq);
4412 cic_set_cfqq(cic, cfqq->new_cfqq, 1);
Jeff Moyerb3b6d042009-10-23 17:14:51 -04004413 cfq_mark_cfqq_coop(cfqq->new_cfqq);
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04004414 cfq_put_queue(cfqq);
4415 return cic_to_cfqq(cic, 1);
4416}
4417
Jeff Moyere6c5bc72009-10-23 17:14:52 -04004418/*
4419 * Returns NULL if a new cfqq should be allocated, or the old cfqq if this
4420 * was the last process referring to said cfqq.
4421 */
4422static struct cfq_queue *
Tejun Heoc5869802011-12-14 00:33:41 +01004423split_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq)
Jeff Moyere6c5bc72009-10-23 17:14:52 -04004424{
4425 if (cfqq_process_refs(cfqq) == 1) {
Jeff Moyere6c5bc72009-10-23 17:14:52 -04004426 cfqq->pid = current->pid;
4427 cfq_clear_cfqq_coop(cfqq);
Shaohua Liae54abe2010-02-05 13:11:45 +01004428 cfq_clear_cfqq_split_coop(cfqq);
Jeff Moyere6c5bc72009-10-23 17:14:52 -04004429 return cfqq;
4430 }
4431
4432 cic_set_cfqq(cic, NULL, 1);
Shaohua Lid02a2c02010-05-25 10:16:53 +02004433
4434 cfq_put_cooperator(cfqq);
4435
Jeff Moyere6c5bc72009-10-23 17:14:52 -04004436 cfq_put_queue(cfqq);
4437 return NULL;
4438}
Linus Torvalds1da177e2005-04-16 15:20:36 -07004439/*
Jens Axboe22e2c502005-06-27 10:55:12 +02004440 * Allocate cfq data structures associated with this request.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004441 */
Jens Axboe22e2c502005-06-27 10:55:12 +02004442static int
Tejun Heo852c7882012-03-05 13:15:27 -08004443cfq_set_request(struct request_queue *q, struct request *rq, struct bio *bio,
4444 gfp_t gfp_mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004445{
4446 struct cfq_data *cfqd = q->elevator->elevator_data;
Tejun Heof1f8cc92011-12-14 00:33:42 +01004447 struct cfq_io_cq *cic = icq_to_cic(rq->elv.icq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004448 const int rw = rq_data_dir(rq);
Jens Axboea6151c32009-10-07 20:02:57 +02004449 const bool is_sync = rq_is_sync(rq);
Jens Axboe22e2c502005-06-27 10:55:12 +02004450 struct cfq_queue *cfqq;
Jens Axboe5d7f5ce2017-02-16 07:57:33 -07004451 bool disable_wbt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004452
Tejun Heo216284c2011-12-14 00:33:38 +01004453 spin_lock_irq(q->queue_lock);
Tejun Heof1f8cc92011-12-14 00:33:42 +01004454
Tejun Heo598971b2012-03-19 15:10:58 -07004455 check_ioprio_changed(cic, bio);
Jens Axboe5d7f5ce2017-02-16 07:57:33 -07004456 disable_wbt = check_blkcg_changed(cic, bio);
Jeff Moyere6c5bc72009-10-23 17:14:52 -04004457new_queue:
Vasily Tarasov91fac312007-04-25 12:29:51 +02004458 cfqq = cic_to_cfqq(cic, is_sync);
Vivek Goyal32f2e802009-07-09 22:13:16 +02004459 if (!cfqq || cfqq == &cfqd->oom_cfqq) {
Tejun Heobce61332015-08-18 14:54:59 -07004460 if (cfqq)
4461 cfq_put_queue(cfqq);
Tejun Heo2da8de02015-08-18 14:55:02 -07004462 cfqq = cfq_get_queue(cfqd, is_sync, cic, bio);
Vasily Tarasov91fac312007-04-25 12:29:51 +02004463 cic_set_cfqq(cic, cfqq, is_sync);
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04004464 } else {
4465 /*
Jeff Moyere6c5bc72009-10-23 17:14:52 -04004466 * If the queue was seeky for too long, break it apart.
4467 */
Shaohua Liae54abe2010-02-05 13:11:45 +01004468 if (cfq_cfqq_coop(cfqq) && cfq_cfqq_split_coop(cfqq)) {
Jeff Moyere6c5bc72009-10-23 17:14:52 -04004469 cfq_log_cfqq(cfqd, cfqq, "breaking apart cfqq");
4470 cfqq = split_cfqq(cic, cfqq);
4471 if (!cfqq)
4472 goto new_queue;
4473 }
4474
4475 /*
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04004476 * Check to see if this queue is scheduled to merge with
4477 * another, closely cooperating queue. The merging of
4478 * queues happens here as it must be done in process context.
4479 * The reference on new_cfqq was taken in merge_cfqqs.
4480 */
4481 if (cfqq->new_cfqq)
4482 cfqq = cfq_merge_cfqqs(cfqd, cic, cfqq);
Vasily Tarasov91fac312007-04-25 12:29:51 +02004483 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004484
4485 cfqq->allocated[rw]++;
Jens Axboe5e705372006-07-13 12:39:25 +02004486
Jens Axboe6fae9c22011-03-01 15:04:39 -05004487 cfqq->ref++;
Tejun Heoeb7d8c072012-03-23 14:02:53 +01004488 cfqg_get(cfqq->cfqg);
Tejun Heoa612fdd2011-12-14 00:33:41 +01004489 rq->elv.priv[0] = cfqq;
Tejun Heo1adaf3d2012-03-05 13:15:15 -08004490 rq->elv.priv[1] = cfqq->cfqg;
Tejun Heo216284c2011-12-14 00:33:38 +01004491 spin_unlock_irq(q->queue_lock);
Jens Axboe5d7f5ce2017-02-16 07:57:33 -07004492
4493 if (disable_wbt)
4494 wbt_disable_default(q);
4495
Jens Axboe5e705372006-07-13 12:39:25 +02004496 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004497}
4498
David Howells65f27f32006-11-22 14:55:48 +00004499static void cfq_kick_queue(struct work_struct *work)
Jens Axboe22e2c502005-06-27 10:55:12 +02004500{
David Howells65f27f32006-11-22 14:55:48 +00004501 struct cfq_data *cfqd =
Jens Axboe23e018a2009-10-05 08:52:35 +02004502 container_of(work, struct cfq_data, unplug_work);
Jens Axboe165125e2007-07-24 09:28:11 +02004503 struct request_queue *q = cfqd->queue;
Jens Axboe22e2c502005-06-27 10:55:12 +02004504
Jens Axboe40bb54d2009-04-15 12:11:10 +02004505 spin_lock_irq(q->queue_lock);
Christoph Hellwig24ecfbe2011-04-18 11:41:33 +02004506 __blk_run_queue(cfqd->queue);
Jens Axboe40bb54d2009-04-15 12:11:10 +02004507 spin_unlock_irq(q->queue_lock);
Jens Axboe22e2c502005-06-27 10:55:12 +02004508}
4509
4510/*
4511 * Timer running if the active_queue is currently idling inside its time slice
4512 */
Jan Kara91148322016-06-08 15:11:39 +02004513static enum hrtimer_restart cfq_idle_slice_timer(struct hrtimer *timer)
Jens Axboe22e2c502005-06-27 10:55:12 +02004514{
Jan Kara91148322016-06-08 15:11:39 +02004515 struct cfq_data *cfqd = container_of(timer, struct cfq_data,
4516 idle_slice_timer);
Jens Axboe22e2c502005-06-27 10:55:12 +02004517 struct cfq_queue *cfqq;
4518 unsigned long flags;
Jens Axboe3c6bd2f2007-01-19 12:06:33 +11004519 int timed_out = 1;
Jens Axboe22e2c502005-06-27 10:55:12 +02004520
Jens Axboe7b679132008-05-30 12:23:07 +02004521 cfq_log(cfqd, "idle timer fired");
4522
Jens Axboe22e2c502005-06-27 10:55:12 +02004523 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
4524
Jens Axboefe094d92008-01-31 13:08:54 +01004525 cfqq = cfqd->active_queue;
4526 if (cfqq) {
Jens Axboe3c6bd2f2007-01-19 12:06:33 +11004527 timed_out = 0;
4528
Jens Axboe22e2c502005-06-27 10:55:12 +02004529 /*
Jens Axboeb0291952009-04-07 11:38:31 +02004530 * We saw a request before the queue expired, let it through
4531 */
4532 if (cfq_cfqq_must_dispatch(cfqq))
4533 goto out_kick;
4534
4535 /*
Jens Axboe22e2c502005-06-27 10:55:12 +02004536 * expired
4537 */
Jens Axboe44f7c162007-01-19 11:51:58 +11004538 if (cfq_slice_used(cfqq))
Jens Axboe22e2c502005-06-27 10:55:12 +02004539 goto expire;
4540
4541 /*
4542 * only expire and reinvoke request handler, if there are
4543 * other queues with pending requests
4544 */
Jens Axboecaaa5f92006-06-16 11:23:00 +02004545 if (!cfqd->busy_queues)
Jens Axboe22e2c502005-06-27 10:55:12 +02004546 goto out_cont;
Jens Axboe22e2c502005-06-27 10:55:12 +02004547
4548 /*
4549 * not expired and it has a request pending, let it dispatch
4550 */
Jens Axboe75e50982009-04-07 08:56:14 +02004551 if (!RB_EMPTY_ROOT(&cfqq->sort_list))
Jens Axboe22e2c502005-06-27 10:55:12 +02004552 goto out_kick;
Corrado Zoccolo76280af2009-11-26 10:02:58 +01004553
4554 /*
4555 * Queue depth flag is reset only when the idle didn't succeed
4556 */
4557 cfq_clear_cfqq_deep(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02004558 }
4559expire:
Vivek Goyale5ff0822010-04-26 19:25:11 +02004560 cfq_slice_expired(cfqd, timed_out);
Jens Axboe22e2c502005-06-27 10:55:12 +02004561out_kick:
Jens Axboe23e018a2009-10-05 08:52:35 +02004562 cfq_schedule_dispatch(cfqd);
Jens Axboe22e2c502005-06-27 10:55:12 +02004563out_cont:
4564 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
Jan Kara91148322016-06-08 15:11:39 +02004565 return HRTIMER_NORESTART;
Jens Axboe22e2c502005-06-27 10:55:12 +02004566}
4567
Jens Axboe3b181522005-06-27 10:56:24 +02004568static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
4569{
Jan Kara91148322016-06-08 15:11:39 +02004570 hrtimer_cancel(&cfqd->idle_slice_timer);
Jens Axboe23e018a2009-10-05 08:52:35 +02004571 cancel_work_sync(&cfqd->unplug_work);
Jens Axboe3b181522005-06-27 10:56:24 +02004572}
Jens Axboe22e2c502005-06-27 10:55:12 +02004573
Jens Axboeb374d182008-10-31 10:05:07 +01004574static void cfq_exit_queue(struct elevator_queue *e)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004575{
Jens Axboe22e2c502005-06-27 10:55:12 +02004576 struct cfq_data *cfqd = e->elevator_data;
Jens Axboe165125e2007-07-24 09:28:11 +02004577 struct request_queue *q = cfqd->queue;
Jens Axboe22e2c502005-06-27 10:55:12 +02004578
Jens Axboe3b181522005-06-27 10:56:24 +02004579 cfq_shutdown_timer_wq(cfqd);
Jens Axboee2d74ac2006-03-28 08:59:01 +02004580
Al Virod9ff4182006-03-18 13:51:22 -05004581 spin_lock_irq(q->queue_lock);
Jens Axboee2d74ac2006-03-28 08:59:01 +02004582
Al Virod9ff4182006-03-18 13:51:22 -05004583 if (cfqd->active_queue)
Vivek Goyale5ff0822010-04-26 19:25:11 +02004584 __cfq_slice_expired(cfqd, cfqd->active_queue, 0);
Jens Axboee2d74ac2006-03-28 08:59:01 +02004585
Tejun Heo03aa2642012-03-05 13:15:19 -08004586 spin_unlock_irq(q->queue_lock);
4587
Al Viroa90d7422006-03-18 12:05:37 -05004588 cfq_shutdown_timer_wq(cfqd);
4589
Tejun Heoffea73f2012-06-04 10:02:29 +02004590#ifdef CONFIG_CFQ_GROUP_IOSCHED
4591 blkcg_deactivate_policy(q, &blkcg_policy_cfq);
4592#else
Tejun Heof51b8022012-03-05 13:15:05 -08004593 kfree(cfqd->root_group);
Vivek Goyal2abae552011-05-23 10:02:19 +02004594#endif
Vivek Goyal56edf7d2011-05-19 15:38:22 -04004595 kfree(cfqd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004596}
4597
Jianpeng Mad50235b2013-07-03 13:25:24 +02004598static int cfq_init_queue(struct request_queue *q, struct elevator_type *e)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004599{
4600 struct cfq_data *cfqd;
Tejun Heo3c798392012-04-16 13:57:25 -07004601 struct blkcg_gq *blkg __maybe_unused;
Tejun Heoa2b16932012-04-13 13:11:33 -07004602 int i, ret;
Jianpeng Mad50235b2013-07-03 13:25:24 +02004603 struct elevator_queue *eq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004604
Jianpeng Mad50235b2013-07-03 13:25:24 +02004605 eq = elevator_alloc(q, e);
4606 if (!eq)
Tejun Heob2fab5a2012-03-05 13:14:57 -08004607 return -ENOMEM;
Konstantin Khlebnikov80b15c72010-05-20 23:21:41 +04004608
Joe Perchesc1b511e2013-08-29 15:21:42 -07004609 cfqd = kzalloc_node(sizeof(*cfqd), GFP_KERNEL, q->node);
Jianpeng Mad50235b2013-07-03 13:25:24 +02004610 if (!cfqd) {
4611 kobject_put(&eq->kobj);
4612 return -ENOMEM;
4613 }
4614 eq->elevator_data = cfqd;
4615
Tejun Heof51b8022012-03-05 13:15:05 -08004616 cfqd->queue = q;
Jianpeng Mad50235b2013-07-03 13:25:24 +02004617 spin_lock_irq(q->queue_lock);
4618 q->elevator = eq;
4619 spin_unlock_irq(q->queue_lock);
Tejun Heof51b8022012-03-05 13:15:05 -08004620
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05004621 /* Init root service tree */
4622 cfqd->grp_service_tree = CFQ_RB_ROOT;
4623
Tejun Heof51b8022012-03-05 13:15:05 -08004624 /* Init root group and prefer root group over other groups by default */
Vivek Goyal25fb5162009-12-03 12:59:46 -05004625#ifdef CONFIG_CFQ_GROUP_IOSCHED
Tejun Heo3c798392012-04-16 13:57:25 -07004626 ret = blkcg_activate_policy(q, &blkcg_policy_cfq);
Tejun Heoa2b16932012-04-13 13:11:33 -07004627 if (ret)
4628 goto out_free;
Vivek Goyal5624a4e2011-05-19 15:38:28 -04004629
Tejun Heoa2b16932012-04-13 13:11:33 -07004630 cfqd->root_group = blkg_to_cfqg(q->root_blkg);
Tejun Heof51b8022012-03-05 13:15:05 -08004631#else
Tejun Heoa2b16932012-04-13 13:11:33 -07004632 ret = -ENOMEM;
Tejun Heof51b8022012-03-05 13:15:05 -08004633 cfqd->root_group = kzalloc_node(sizeof(*cfqd->root_group),
4634 GFP_KERNEL, cfqd->queue->node);
Tejun Heoa2b16932012-04-13 13:11:33 -07004635 if (!cfqd->root_group)
4636 goto out_free;
Vivek Goyal5624a4e2011-05-19 15:38:28 -04004637
Tejun Heoa2b16932012-04-13 13:11:33 -07004638 cfq_init_cfqg_base(cfqd->root_group);
Tejun Heo3ecca622015-08-18 14:55:35 -07004639 cfqd->root_group->weight = 2 * CFQ_WEIGHT_LEGACY_DFL;
4640 cfqd->root_group->leaf_weight = 2 * CFQ_WEIGHT_LEGACY_DFL;
Tejun Heo69d7fde2015-08-18 14:55:36 -07004641#endif
Vivek Goyal5624a4e2011-05-19 15:38:28 -04004642
Jens Axboe26a2ac02009-04-23 12:13:27 +02004643 /*
4644 * Not strictly needed (since RB_ROOT just clears the node and we
4645 * zeroed cfqd on alloc), but better be safe in case someone decides
4646 * to add magic to the rb code
4647 */
4648 for (i = 0; i < CFQ_PRIO_LISTS; i++)
4649 cfqd->prio_trees[i] = RB_ROOT;
4650
Jens Axboe6118b702009-06-30 09:34:12 +02004651 /*
Tejun Heod4aad7f2015-08-18 14:55:04 -07004652 * Our fallback cfqq if cfq_get_queue() runs into OOM issues.
Jens Axboe6118b702009-06-30 09:34:12 +02004653 * Grab a permanent reference to it, so that the normal code flow
Tejun Heof51b8022012-03-05 13:15:05 -08004654 * will not attempt to free it. oom_cfqq is linked to root_group
4655 * but shouldn't hold a reference as it'll never be unlinked. Lose
4656 * the reference from linking right away.
Jens Axboe6118b702009-06-30 09:34:12 +02004657 */
4658 cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
Shaohua Li30d7b942011-01-07 08:46:59 +01004659 cfqd->oom_cfqq.ref++;
Tejun Heo1adaf3d2012-03-05 13:15:15 -08004660
4661 spin_lock_irq(q->queue_lock);
Tejun Heof51b8022012-03-05 13:15:05 -08004662 cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, cfqd->root_group);
Tejun Heoeb7d8c072012-03-23 14:02:53 +01004663 cfqg_put(cfqd->root_group);
Tejun Heo1adaf3d2012-03-05 13:15:15 -08004664 spin_unlock_irq(q->queue_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004665
Jan Kara91148322016-06-08 15:11:39 +02004666 hrtimer_init(&cfqd->idle_slice_timer, CLOCK_MONOTONIC,
4667 HRTIMER_MODE_REL);
Jens Axboe22e2c502005-06-27 10:55:12 +02004668 cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
Jens Axboe22e2c502005-06-27 10:55:12 +02004669
Jens Axboe23e018a2009-10-05 08:52:35 +02004670 INIT_WORK(&cfqd->unplug_work, cfq_kick_queue);
Jens Axboe22e2c502005-06-27 10:55:12 +02004671
Linus Torvalds1da177e2005-04-16 15:20:36 -07004672 cfqd->cfq_quantum = cfq_quantum;
Jens Axboe22e2c502005-06-27 10:55:12 +02004673 cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
4674 cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07004675 cfqd->cfq_back_max = cfq_back_max;
4676 cfqd->cfq_back_penalty = cfq_back_penalty;
Jens Axboe22e2c502005-06-27 10:55:12 +02004677 cfqd->cfq_slice[0] = cfq_slice_async;
4678 cfqd->cfq_slice[1] = cfq_slice_sync;
Tao Ma5bf14c02012-04-01 14:33:39 -07004679 cfqd->cfq_target_latency = cfq_target_latency;
Jens Axboe22e2c502005-06-27 10:55:12 +02004680 cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
Jens Axboe0bb97942015-06-10 08:01:20 -06004681 cfqd->cfq_slice_idle = cfq_slice_idle;
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02004682 cfqd->cfq_group_idle = cfq_group_idle;
Jens Axboe963b72f2009-10-03 19:42:18 +02004683 cfqd->cfq_latency = 1;
Corrado Zoccoloe459dd02009-11-26 10:02:57 +01004684 cfqd->hw_tag = -1;
Corrado Zoccoloedc71132009-12-09 20:56:04 +01004685 /*
4686 * we optimistically start assuming sync ops weren't delayed in last
4687 * second, in order to have larger depth for async operations.
4688 */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06004689 cfqd->last_delayed_sync = ktime_get_ns() - NSEC_PER_SEC;
Tejun Heob2fab5a2012-03-05 13:14:57 -08004690 return 0;
Tejun Heoa2b16932012-04-13 13:11:33 -07004691
4692out_free:
4693 kfree(cfqd);
Jianpeng Mad50235b2013-07-03 13:25:24 +02004694 kobject_put(&eq->kobj);
Tejun Heoa2b16932012-04-13 13:11:33 -07004695 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004696}
4697
Jens Axboe0bb97942015-06-10 08:01:20 -06004698static void cfq_registered_queue(struct request_queue *q)
4699{
4700 struct elevator_queue *e = q->elevator;
4701 struct cfq_data *cfqd = e->elevator_data;
4702
4703 /*
4704 * Default to IOPS mode with no idling for SSDs
4705 */
4706 if (blk_queue_nonrot(q))
4707 cfqd->cfq_slice_idle = 0;
4708}
4709
Linus Torvalds1da177e2005-04-16 15:20:36 -07004710/*
4711 * sysfs parts below -->
4712 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004713static ssize_t
4714cfq_var_show(unsigned int var, char *page)
4715{
Masanari Iida176167a2014-04-28 12:38:34 +09004716 return sprintf(page, "%u\n", var);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004717}
4718
4719static ssize_t
4720cfq_var_store(unsigned int *var, const char *page, size_t count)
4721{
4722 char *p = (char *) page;
4723
4724 *var = simple_strtoul(p, &p, 10);
4725 return count;
4726}
4727
Linus Torvalds1da177e2005-04-16 15:20:36 -07004728#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
Jens Axboeb374d182008-10-31 10:05:07 +01004729static ssize_t __FUNC(struct elevator_queue *e, char *page) \
Linus Torvalds1da177e2005-04-16 15:20:36 -07004730{ \
Al Viro3d1ab402006-03-18 18:35:43 -05004731 struct cfq_data *cfqd = e->elevator_data; \
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06004732 u64 __data = __VAR; \
Linus Torvalds1da177e2005-04-16 15:20:36 -07004733 if (__CONV) \
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06004734 __data = div_u64(__data, NSEC_PER_MSEC); \
Linus Torvalds1da177e2005-04-16 15:20:36 -07004735 return cfq_var_show(__data, (page)); \
4736}
4737SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
Jens Axboe22e2c502005-06-27 10:55:12 +02004738SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1);
4739SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1);
Al Viroe572ec72006-03-18 22:27:18 -05004740SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0);
4741SHOW_FUNCTION(cfq_back_seek_penalty_show, cfqd->cfq_back_penalty, 0);
Jens Axboe22e2c502005-06-27 10:55:12 +02004742SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1);
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02004743SHOW_FUNCTION(cfq_group_idle_show, cfqd->cfq_group_idle, 1);
Jens Axboe22e2c502005-06-27 10:55:12 +02004744SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
4745SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
4746SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
Jens Axboe963b72f2009-10-03 19:42:18 +02004747SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0);
Tao Ma5bf14c02012-04-01 14:33:39 -07004748SHOW_FUNCTION(cfq_target_latency_show, cfqd->cfq_target_latency, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004749#undef SHOW_FUNCTION
4750
Jeff Moyerd2d481d2016-06-08 15:11:38 +02004751#define USEC_SHOW_FUNCTION(__FUNC, __VAR) \
4752static ssize_t __FUNC(struct elevator_queue *e, char *page) \
4753{ \
4754 struct cfq_data *cfqd = e->elevator_data; \
4755 u64 __data = __VAR; \
4756 __data = div_u64(__data, NSEC_PER_USEC); \
4757 return cfq_var_show(__data, (page)); \
4758}
4759USEC_SHOW_FUNCTION(cfq_slice_idle_us_show, cfqd->cfq_slice_idle);
4760USEC_SHOW_FUNCTION(cfq_group_idle_us_show, cfqd->cfq_group_idle);
4761USEC_SHOW_FUNCTION(cfq_slice_sync_us_show, cfqd->cfq_slice[1]);
4762USEC_SHOW_FUNCTION(cfq_slice_async_us_show, cfqd->cfq_slice[0]);
4763USEC_SHOW_FUNCTION(cfq_target_latency_us_show, cfqd->cfq_target_latency);
4764#undef USEC_SHOW_FUNCTION
4765
Linus Torvalds1da177e2005-04-16 15:20:36 -07004766#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
Jens Axboeb374d182008-10-31 10:05:07 +01004767static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
Linus Torvalds1da177e2005-04-16 15:20:36 -07004768{ \
Al Viro3d1ab402006-03-18 18:35:43 -05004769 struct cfq_data *cfqd = e->elevator_data; \
Linus Torvalds1da177e2005-04-16 15:20:36 -07004770 unsigned int __data; \
4771 int ret = cfq_var_store(&__data, (page), count); \
4772 if (__data < (MIN)) \
4773 __data = (MIN); \
4774 else if (__data > (MAX)) \
4775 __data = (MAX); \
4776 if (__CONV) \
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06004777 *(__PTR) = (u64)__data * NSEC_PER_MSEC; \
Linus Torvalds1da177e2005-04-16 15:20:36 -07004778 else \
4779 *(__PTR) = __data; \
4780 return ret; \
4781}
4782STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0);
Jens Axboefe094d92008-01-31 13:08:54 +01004783STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1,
4784 UINT_MAX, 1);
4785STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1,
4786 UINT_MAX, 1);
Al Viroe572ec72006-03-18 22:27:18 -05004787STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0);
Jens Axboefe094d92008-01-31 13:08:54 +01004788STORE_FUNCTION(cfq_back_seek_penalty_store, &cfqd->cfq_back_penalty, 1,
4789 UINT_MAX, 0);
Jens Axboe22e2c502005-06-27 10:55:12 +02004790STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1);
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02004791STORE_FUNCTION(cfq_group_idle_store, &cfqd->cfq_group_idle, 0, UINT_MAX, 1);
Jens Axboe22e2c502005-06-27 10:55:12 +02004792STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);
4793STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
Jens Axboefe094d92008-01-31 13:08:54 +01004794STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1,
4795 UINT_MAX, 0);
Jens Axboe963b72f2009-10-03 19:42:18 +02004796STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0);
Tao Ma5bf14c02012-04-01 14:33:39 -07004797STORE_FUNCTION(cfq_target_latency_store, &cfqd->cfq_target_latency, 1, UINT_MAX, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004798#undef STORE_FUNCTION
4799
Jeff Moyerd2d481d2016-06-08 15:11:38 +02004800#define USEC_STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \
4801static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
4802{ \
4803 struct cfq_data *cfqd = e->elevator_data; \
4804 unsigned int __data; \
4805 int ret = cfq_var_store(&__data, (page), count); \
4806 if (__data < (MIN)) \
4807 __data = (MIN); \
4808 else if (__data > (MAX)) \
4809 __data = (MAX); \
4810 *(__PTR) = (u64)__data * NSEC_PER_USEC; \
4811 return ret; \
4812}
4813USEC_STORE_FUNCTION(cfq_slice_idle_us_store, &cfqd->cfq_slice_idle, 0, UINT_MAX);
4814USEC_STORE_FUNCTION(cfq_group_idle_us_store, &cfqd->cfq_group_idle, 0, UINT_MAX);
4815USEC_STORE_FUNCTION(cfq_slice_sync_us_store, &cfqd->cfq_slice[1], 1, UINT_MAX);
4816USEC_STORE_FUNCTION(cfq_slice_async_us_store, &cfqd->cfq_slice[0], 1, UINT_MAX);
4817USEC_STORE_FUNCTION(cfq_target_latency_us_store, &cfqd->cfq_target_latency, 1, UINT_MAX);
4818#undef USEC_STORE_FUNCTION
4819
Al Viroe572ec72006-03-18 22:27:18 -05004820#define CFQ_ATTR(name) \
4821 __ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store)
Jens Axboe3b181522005-06-27 10:56:24 +02004822
Al Viroe572ec72006-03-18 22:27:18 -05004823static struct elv_fs_entry cfq_attrs[] = {
4824 CFQ_ATTR(quantum),
Al Viroe572ec72006-03-18 22:27:18 -05004825 CFQ_ATTR(fifo_expire_sync),
4826 CFQ_ATTR(fifo_expire_async),
4827 CFQ_ATTR(back_seek_max),
4828 CFQ_ATTR(back_seek_penalty),
4829 CFQ_ATTR(slice_sync),
Jeff Moyerd2d481d2016-06-08 15:11:38 +02004830 CFQ_ATTR(slice_sync_us),
Al Viroe572ec72006-03-18 22:27:18 -05004831 CFQ_ATTR(slice_async),
Jeff Moyerd2d481d2016-06-08 15:11:38 +02004832 CFQ_ATTR(slice_async_us),
Al Viroe572ec72006-03-18 22:27:18 -05004833 CFQ_ATTR(slice_async_rq),
4834 CFQ_ATTR(slice_idle),
Jeff Moyerd2d481d2016-06-08 15:11:38 +02004835 CFQ_ATTR(slice_idle_us),
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02004836 CFQ_ATTR(group_idle),
Jeff Moyerd2d481d2016-06-08 15:11:38 +02004837 CFQ_ATTR(group_idle_us),
Jens Axboe963b72f2009-10-03 19:42:18 +02004838 CFQ_ATTR(low_latency),
Tao Ma5bf14c02012-04-01 14:33:39 -07004839 CFQ_ATTR(target_latency),
Jeff Moyerd2d481d2016-06-08 15:11:38 +02004840 CFQ_ATTR(target_latency_us),
Al Viroe572ec72006-03-18 22:27:18 -05004841 __ATTR_NULL
Linus Torvalds1da177e2005-04-16 15:20:36 -07004842};
4843
Linus Torvalds1da177e2005-04-16 15:20:36 -07004844static struct elevator_type iosched_cfq = {
Jens Axboec51ca6c2016-12-10 15:13:59 -07004845 .ops.sq = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004846 .elevator_merge_fn = cfq_merge,
4847 .elevator_merged_fn = cfq_merged_request,
4848 .elevator_merge_req_fn = cfq_merged_requests,
Tahsin Erdogan72ef7992016-07-07 11:48:22 -07004849 .elevator_allow_bio_merge_fn = cfq_allow_bio_merge,
4850 .elevator_allow_rq_merge_fn = cfq_allow_rq_merge,
Divyesh Shah812d4022010-04-08 21:14:23 -07004851 .elevator_bio_merged_fn = cfq_bio_merged,
Jens Axboeb4878f22005-10-20 16:42:29 +02004852 .elevator_dispatch_fn = cfq_dispatch_requests,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004853 .elevator_add_req_fn = cfq_insert_request,
Jens Axboeb4878f22005-10-20 16:42:29 +02004854 .elevator_activate_req_fn = cfq_activate_request,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004855 .elevator_deactivate_req_fn = cfq_deactivate_request,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004856 .elevator_completed_req_fn = cfq_completed_request,
Jens Axboe21183b02006-07-13 12:33:14 +02004857 .elevator_former_req_fn = elv_rb_former_request,
4858 .elevator_latter_req_fn = elv_rb_latter_request,
Tejun Heo9b84cac2011-12-14 00:33:42 +01004859 .elevator_init_icq_fn = cfq_init_icq,
Tejun Heo7e5a8792011-12-14 00:33:42 +01004860 .elevator_exit_icq_fn = cfq_exit_icq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004861 .elevator_set_req_fn = cfq_set_request,
4862 .elevator_put_req_fn = cfq_put_request,
4863 .elevator_may_queue_fn = cfq_may_queue,
4864 .elevator_init_fn = cfq_init_queue,
4865 .elevator_exit_fn = cfq_exit_queue,
Jens Axboe0bb97942015-06-10 08:01:20 -06004866 .elevator_registered_fn = cfq_registered_queue,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004867 },
Tejun Heo3d3c2372011-12-14 00:33:42 +01004868 .icq_size = sizeof(struct cfq_io_cq),
4869 .icq_align = __alignof__(struct cfq_io_cq),
Al Viro3d1ab402006-03-18 18:35:43 -05004870 .elevator_attrs = cfq_attrs,
Tejun Heo3d3c2372011-12-14 00:33:42 +01004871 .elevator_name = "cfq",
Linus Torvalds1da177e2005-04-16 15:20:36 -07004872 .elevator_owner = THIS_MODULE,
4873};
4874
Vivek Goyal3e252062009-12-04 10:36:42 -05004875#ifdef CONFIG_CFQ_GROUP_IOSCHED
Tejun Heo3c798392012-04-16 13:57:25 -07004876static struct blkcg_policy blkcg_policy_cfq = {
Tejun Heo2ee867dc2015-08-18 14:55:34 -07004877 .dfl_cftypes = cfq_blkcg_files,
Tejun Heo880f50e2015-08-18 14:55:30 -07004878 .legacy_cftypes = cfq_blkcg_legacy_files,
Tejun Heof9fcc2d2012-04-16 13:57:27 -07004879
Tejun Heoe4a9bde2015-08-18 14:55:16 -07004880 .cpd_alloc_fn = cfq_cpd_alloc,
Arianna Avanzinie48453c2015-06-05 23:38:42 +02004881 .cpd_init_fn = cfq_cpd_init,
Tejun Heoe4a9bde2015-08-18 14:55:16 -07004882 .cpd_free_fn = cfq_cpd_free,
Tejun Heo69d7fde2015-08-18 14:55:36 -07004883 .cpd_bind_fn = cfq_cpd_bind,
Tejun Heoe4a9bde2015-08-18 14:55:16 -07004884
Tejun Heo001bea72015-08-18 14:55:11 -07004885 .pd_alloc_fn = cfq_pd_alloc,
Tejun Heof9fcc2d2012-04-16 13:57:27 -07004886 .pd_init_fn = cfq_pd_init,
Tejun Heo0b399202013-01-09 08:05:13 -08004887 .pd_offline_fn = cfq_pd_offline,
Tejun Heo001bea72015-08-18 14:55:11 -07004888 .pd_free_fn = cfq_pd_free,
Tejun Heof9fcc2d2012-04-16 13:57:27 -07004889 .pd_reset_stats_fn = cfq_pd_reset_stats,
Vivek Goyal3e252062009-12-04 10:36:42 -05004890};
Vivek Goyal3e252062009-12-04 10:36:42 -05004891#endif
4892
Linus Torvalds1da177e2005-04-16 15:20:36 -07004893static int __init cfq_init(void)
4894{
Tejun Heo3d3c2372011-12-14 00:33:42 +01004895 int ret;
4896
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02004897#ifdef CONFIG_CFQ_GROUP_IOSCHED
Tejun Heo3c798392012-04-16 13:57:25 -07004898 ret = blkcg_policy_register(&blkcg_policy_cfq);
Tejun Heo8bd435b2012-04-13 13:11:28 -07004899 if (ret)
4900 return ret;
Tejun Heoffea73f2012-06-04 10:02:29 +02004901#else
4902 cfq_group_idle = 0;
4903#endif
Tejun Heo8bd435b2012-04-13 13:11:28 -07004904
Tejun Heofd794952012-06-04 10:01:38 +02004905 ret = -ENOMEM;
Tejun Heo3d3c2372011-12-14 00:33:42 +01004906 cfq_pool = KMEM_CACHE(cfq_queue, 0);
4907 if (!cfq_pool)
Tejun Heo8bd435b2012-04-13 13:11:28 -07004908 goto err_pol_unreg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004909
Tejun Heo3d3c2372011-12-14 00:33:42 +01004910 ret = elv_register(&iosched_cfq);
Tejun Heo8bd435b2012-04-13 13:11:28 -07004911 if (ret)
4912 goto err_free_pool;
Tejun Heo3d3c2372011-12-14 00:33:42 +01004913
Adrian Bunk2fdd82b2007-12-12 18:51:56 +01004914 return 0;
Tejun Heo8bd435b2012-04-13 13:11:28 -07004915
4916err_free_pool:
4917 kmem_cache_destroy(cfq_pool);
4918err_pol_unreg:
Tejun Heoffea73f2012-06-04 10:02:29 +02004919#ifdef CONFIG_CFQ_GROUP_IOSCHED
Tejun Heo3c798392012-04-16 13:57:25 -07004920 blkcg_policy_unregister(&blkcg_policy_cfq);
Tejun Heoffea73f2012-06-04 10:02:29 +02004921#endif
Tejun Heo8bd435b2012-04-13 13:11:28 -07004922 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004923}
4924
4925static void __exit cfq_exit(void)
4926{
Tejun Heoffea73f2012-06-04 10:02:29 +02004927#ifdef CONFIG_CFQ_GROUP_IOSCHED
Tejun Heo3c798392012-04-16 13:57:25 -07004928 blkcg_policy_unregister(&blkcg_policy_cfq);
Tejun Heoffea73f2012-06-04 10:02:29 +02004929#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07004930 elv_unregister(&iosched_cfq);
Tejun Heo3d3c2372011-12-14 00:33:42 +01004931 kmem_cache_destroy(cfq_pool);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004932}
4933
4934module_init(cfq_init);
4935module_exit(cfq_exit);
4936
4937MODULE_AUTHOR("Jens Axboe");
4938MODULE_LICENSE("GPL");
4939MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler");