blob: f0f29ee731e1fd81450a5baab98bb72055705e11 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07002 * CFQ, or complete fairness queueing, disk scheduler.
3 *
4 * Based on ideas from a previously unfinished io
5 * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
6 *
Jens Axboe0fe23472006-09-04 15:41:16 +02007 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07009#include <linux/module.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090010#include <linux/slab.h>
Al Viro1cc9be62006-03-18 12:29:52 -050011#include <linux/blkdev.h>
12#include <linux/elevator.h>
Jeff Moyer9a7f38c2016-06-08 08:55:34 -060013#include <linux/ktime.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/rbtree.h>
Jens Axboe22e2c502005-06-27 10:55:12 +020015#include <linux/ioprio.h>
Jens Axboe7b679132008-05-30 12:23:07 +020016#include <linux/blktrace_api.h>
Tejun Heoeea8f412015-05-22 17:13:17 -040017#include <linux/blk-cgroup.h>
Tejun Heo6e736be2011-12-14 00:33:38 +010018#include "blk.h"
Jens Axboe87760e52016-11-09 12:38:14 -070019#include "blk-wbt.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070020
21/*
22 * tunables
23 */
Jens Axboefe094d92008-01-31 13:08:54 +010024/* max queue in one round of service */
Shaohua Liabc3c742010-03-01 09:20:54 +010025static const int cfq_quantum = 8;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -060026static const u64 cfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 };
Jens Axboefe094d92008-01-31 13:08:54 +010027/* maximum backwards seek, in KiB */
28static const int cfq_back_max = 16 * 1024;
29/* penalty of a backwards seek */
30static const int cfq_back_penalty = 2;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -060031static const u64 cfq_slice_sync = NSEC_PER_SEC / 10;
32static u64 cfq_slice_async = NSEC_PER_SEC / 25;
Arjan van de Ven64100092006-01-06 09:46:02 +010033static const int cfq_slice_async_rq = 2;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -060034static u64 cfq_slice_idle = NSEC_PER_SEC / 125;
35static u64 cfq_group_idle = NSEC_PER_SEC / 125;
36static const u64 cfq_target_latency = (u64)NSEC_PER_SEC * 3/10; /* 300 ms */
Corrado Zoccolo5db5d642009-10-26 22:44:04 +010037static const int cfq_hist_divisor = 4;
Jens Axboe22e2c502005-06-27 10:55:12 +020038
Jens Axboed9e76202007-04-20 14:27:50 +020039/*
Jens Axboe08717142008-01-28 11:38:15 +010040 * offset from end of service tree
Jens Axboed9e76202007-04-20 14:27:50 +020041 */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -060042#define CFQ_IDLE_DELAY (NSEC_PER_SEC / 5)
Jens Axboed9e76202007-04-20 14:27:50 +020043
44/*
45 * below this threshold, we consider thinktime immediate
46 */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -060047#define CFQ_MIN_TT (2 * NSEC_PER_SEC / HZ)
Jens Axboed9e76202007-04-20 14:27:50 +020048
Jens Axboe22e2c502005-06-27 10:55:12 +020049#define CFQ_SLICE_SCALE (5)
Aaron Carroll45333d52008-08-26 15:52:36 +020050#define CFQ_HW_QUEUE_MIN (5)
Vivek Goyal25bc6b02009-12-03 12:59:43 -050051#define CFQ_SERVICE_SHIFT 12
Jens Axboe22e2c502005-06-27 10:55:12 +020052
Corrado Zoccolo3dde36d2010-02-27 19:45:39 +010053#define CFQQ_SEEK_THR (sector_t)(8 * 100)
Shaohua Lie9ce3352010-03-19 08:03:04 +010054#define CFQQ_CLOSE_THR (sector_t)(8 * 1024)
Corrado Zoccolo41647e72010-02-27 19:45:40 +010055#define CFQQ_SECT_THR_NONROT (sector_t)(2 * 32)
Corrado Zoccolo3dde36d2010-02-27 19:45:39 +010056#define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8)
Shaohua Liae54abe2010-02-05 13:11:45 +010057
Tejun Heoa612fdd2011-12-14 00:33:41 +010058#define RQ_CIC(rq) icq_to_cic((rq)->elv.icq)
59#define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elv.priv[0])
60#define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elv.priv[1])
Linus Torvalds1da177e2005-04-16 15:20:36 -070061
Christoph Lametere18b8902006-12-06 20:33:20 -080062static struct kmem_cache *cfq_pool;
Linus Torvalds1da177e2005-04-16 15:20:36 -070063
Jens Axboe22e2c502005-06-27 10:55:12 +020064#define CFQ_PRIO_LISTS IOPRIO_BE_NR
65#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
Jens Axboe22e2c502005-06-27 10:55:12 +020066#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
67
Jens Axboe206dc692006-03-28 13:03:44 +020068#define sample_valid(samples) ((samples) > 80)
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -050069#define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node)
Jens Axboe206dc692006-03-28 13:03:44 +020070
Arianna Avanzinie48453c2015-06-05 23:38:42 +020071/* blkio-related constants */
Tejun Heo3ecca622015-08-18 14:55:35 -070072#define CFQ_WEIGHT_LEGACY_MIN 10
73#define CFQ_WEIGHT_LEGACY_DFL 500
74#define CFQ_WEIGHT_LEGACY_MAX 1000
Arianna Avanzinie48453c2015-06-05 23:38:42 +020075
Tejun Heoc5869802011-12-14 00:33:41 +010076struct cfq_ttime {
Jeff Moyer9a7f38c2016-06-08 08:55:34 -060077 u64 last_end_request;
Tejun Heoc5869802011-12-14 00:33:41 +010078
Jeff Moyer9a7f38c2016-06-08 08:55:34 -060079 u64 ttime_total;
80 u64 ttime_mean;
Tejun Heoc5869802011-12-14 00:33:41 +010081 unsigned long ttime_samples;
Tejun Heoc5869802011-12-14 00:33:41 +010082};
83
Jens Axboe22e2c502005-06-27 10:55:12 +020084/*
Jens Axboecc09e292007-04-26 12:53:50 +020085 * Most of our rbtree usage is for sorting with min extraction, so
86 * if we cache the leftmost node we don't have to walk down the tree
87 * to find it. Idea borrowed from Ingo Molnars CFS scheduler. We should
88 * move this into the elevator for the rq sorting as well.
89 */
90struct cfq_rb_root {
91 struct rb_root rb;
92 struct rb_node *left;
Corrado Zoccoloaa6f6a32009-10-26 22:44:33 +010093 unsigned count;
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -050094 u64 min_vdisktime;
Shaohua Lif5f2b6c2011-07-12 14:24:55 +020095 struct cfq_ttime ttime;
Jens Axboecc09e292007-04-26 12:53:50 +020096};
Shaohua Lif5f2b6c2011-07-12 14:24:55 +020097#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, \
Jeff Moyer9a7f38c2016-06-08 08:55:34 -060098 .ttime = {.last_end_request = ktime_get_ns(),},}
Jens Axboecc09e292007-04-26 12:53:50 +020099
100/*
Jens Axboe6118b702009-06-30 09:34:12 +0200101 * Per process-grouping structure
102 */
103struct cfq_queue {
104 /* reference count */
Shaohua Li30d7b942011-01-07 08:46:59 +0100105 int ref;
Jens Axboe6118b702009-06-30 09:34:12 +0200106 /* various state flags, see below */
107 unsigned int flags;
108 /* parent cfq_data */
109 struct cfq_data *cfqd;
110 /* service_tree member */
111 struct rb_node rb_node;
112 /* service_tree key */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600113 u64 rb_key;
Jens Axboe6118b702009-06-30 09:34:12 +0200114 /* prio tree member */
115 struct rb_node p_node;
116 /* prio tree root we belong to, if any */
117 struct rb_root *p_root;
118 /* sorted list of pending requests */
119 struct rb_root sort_list;
120 /* if fifo isn't expired, next request to serve */
121 struct request *next_rq;
122 /* requests queued in sort_list */
123 int queued[2];
124 /* currently allocated requests */
125 int allocated[2];
126 /* fifo list of requests in sort_list */
127 struct list_head fifo;
128
Vivek Goyaldae739e2009-12-03 12:59:45 -0500129 /* time when queue got scheduled in to dispatch first request. */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600130 u64 dispatch_start;
131 u64 allocated_slice;
132 u64 slice_dispatch;
Vivek Goyaldae739e2009-12-03 12:59:45 -0500133 /* time when first request from queue completed and slice started. */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600134 u64 slice_start;
135 u64 slice_end;
Jan Kara93fdf142016-06-28 09:04:00 +0200136 s64 slice_resid;
Jens Axboe6118b702009-06-30 09:34:12 +0200137
Christoph Hellwig65299a32011-08-23 14:50:29 +0200138 /* pending priority requests */
139 int prio_pending;
Jens Axboe6118b702009-06-30 09:34:12 +0200140 /* number of requests that are on the dispatch list or inside driver */
141 int dispatched;
142
143 /* io prio of this group */
144 unsigned short ioprio, org_ioprio;
Jens Axboeb8269db2016-06-09 15:47:29 -0600145 unsigned short ioprio_class, org_ioprio_class;
Jens Axboe6118b702009-06-30 09:34:12 +0200146
Richard Kennedyc4081ba2010-02-22 13:49:24 +0100147 pid_t pid;
148
Corrado Zoccolo3dde36d2010-02-27 19:45:39 +0100149 u32 seek_history;
Jeff Moyerb2c18e12009-10-23 17:14:49 -0400150 sector_t last_request_pos;
151
Corrado Zoccoloaa6f6a32009-10-26 22:44:33 +0100152 struct cfq_rb_root *service_tree;
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -0400153 struct cfq_queue *new_cfqq;
Vivek Goyalcdb16e82009-12-03 12:59:38 -0500154 struct cfq_group *cfqg;
Vivek Goyalc4e78932010-08-23 12:25:03 +0200155 /* Number of sectors dispatched from queue in single dispatch round */
156 unsigned long nr_sectors;
Jens Axboe6118b702009-06-30 09:34:12 +0200157};
158
159/*
Corrado Zoccolo718eee02009-10-26 22:45:29 +0100160 * First index in the service_trees.
Corrado Zoccoloc0324a02009-10-27 19:16:03 +0100161 * IDLE is handled separately, so it has negative index
162 */
Vivek Goyal3bf10fe2012-10-03 16:56:56 -0400163enum wl_class_t {
Corrado Zoccoloc0324a02009-10-27 19:16:03 +0100164 BE_WORKLOAD = 0,
Vivek Goyal615f0252009-12-03 12:59:39 -0500165 RT_WORKLOAD = 1,
166 IDLE_WORKLOAD = 2,
Vivek Goyalb4627322010-10-22 09:48:43 +0200167 CFQ_PRIO_NR,
Corrado Zoccoloc0324a02009-10-27 19:16:03 +0100168};
169
170/*
Corrado Zoccolo718eee02009-10-26 22:45:29 +0100171 * Second index in the service_trees.
172 */
173enum wl_type_t {
174 ASYNC_WORKLOAD = 0,
175 SYNC_NOIDLE_WORKLOAD = 1,
176 SYNC_WORKLOAD = 2
177};
178
Tejun Heo155fead2012-04-01 14:38:44 -0700179struct cfqg_stats {
180#ifdef CONFIG_CFQ_GROUP_IOSCHED
Tejun Heo155fead2012-04-01 14:38:44 -0700181 /* number of ios merged */
182 struct blkg_rwstat merged;
183 /* total time spent on device in ns, may not be accurate w/ queueing */
184 struct blkg_rwstat service_time;
185 /* total time spent waiting in scheduler queue in ns */
186 struct blkg_rwstat wait_time;
187 /* number of IOs queued up */
188 struct blkg_rwstat queued;
Tejun Heo155fead2012-04-01 14:38:44 -0700189 /* total disk time and nr sectors dispatched by this group */
190 struct blkg_stat time;
191#ifdef CONFIG_DEBUG_BLK_CGROUP
192 /* time not charged to this cgroup */
193 struct blkg_stat unaccounted_time;
194 /* sum of number of ios queued across all samples */
195 struct blkg_stat avg_queue_size_sum;
196 /* count of samples taken for average */
197 struct blkg_stat avg_queue_size_samples;
198 /* how many times this group has been removed from service tree */
199 struct blkg_stat dequeue;
200 /* total time spent waiting for it to be assigned a timeslice. */
201 struct blkg_stat group_wait_time;
Tejun Heo3c798392012-04-16 13:57:25 -0700202 /* time spent idling for this blkcg_gq */
Tejun Heo155fead2012-04-01 14:38:44 -0700203 struct blkg_stat idle_time;
204 /* total time with empty current active q with other requests queued */
205 struct blkg_stat empty_time;
206 /* fields after this shouldn't be cleared on stat reset */
207 uint64_t start_group_wait_time;
208 uint64_t start_idle_time;
209 uint64_t start_empty_time;
210 uint16_t flags;
211#endif /* CONFIG_DEBUG_BLK_CGROUP */
212#endif /* CONFIG_CFQ_GROUP_IOSCHED */
213};
214
Arianna Avanzinie48453c2015-06-05 23:38:42 +0200215/* Per-cgroup data */
216struct cfq_group_data {
217 /* must be the first member */
Tejun Heo81437642015-08-18 14:55:15 -0700218 struct blkcg_policy_data cpd;
Arianna Avanzinie48453c2015-06-05 23:38:42 +0200219
220 unsigned int weight;
221 unsigned int leaf_weight;
222};
223
Vivek Goyalcdb16e82009-12-03 12:59:38 -0500224/* This is per cgroup per device grouping structure */
225struct cfq_group {
Tejun Heof95a04a2012-04-16 13:57:26 -0700226 /* must be the first member */
227 struct blkg_policy_data pd;
228
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -0500229 /* group service_tree member */
230 struct rb_node rb_node;
231
232 /* group service_tree key */
233 u64 vdisktime;
Tejun Heoe71357e2013-01-09 08:05:10 -0800234
235 /*
Tejun Heo7918ffb2013-01-09 08:05:11 -0800236 * The number of active cfqgs and sum of their weights under this
237 * cfqg. This covers this cfqg's leaf_weight and all children's
238 * weights, but does not cover weights of further descendants.
239 *
240 * If a cfqg is on the service tree, it's active. An active cfqg
241 * also activates its parent and contributes to the children_weight
242 * of the parent.
243 */
244 int nr_active;
245 unsigned int children_weight;
246
247 /*
Tejun Heo1d3650f2013-01-09 08:05:11 -0800248 * vfraction is the fraction of vdisktime that the tasks in this
249 * cfqg are entitled to. This is determined by compounding the
250 * ratios walking up from this cfqg to the root.
251 *
252 * It is in fixed point w/ CFQ_SERVICE_SHIFT and the sum of all
253 * vfractions on a service tree is approximately 1. The sum may
254 * deviate a bit due to rounding errors and fluctuations caused by
255 * cfqgs entering and leaving the service tree.
256 */
257 unsigned int vfraction;
258
259 /*
Tejun Heoe71357e2013-01-09 08:05:10 -0800260 * There are two weights - (internal) weight is the weight of this
261 * cfqg against the sibling cfqgs. leaf_weight is the wight of
262 * this cfqg against the child cfqgs. For the root cfqg, both
263 * weights are kept in sync for backward compatibility.
264 */
Vivek Goyal25bc6b02009-12-03 12:59:43 -0500265 unsigned int weight;
Justin TerAvest8184f932011-03-17 16:12:36 +0100266 unsigned int new_weight;
Tejun Heo3381cb82012-04-01 14:38:44 -0700267 unsigned int dev_weight;
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -0500268
Tejun Heoe71357e2013-01-09 08:05:10 -0800269 unsigned int leaf_weight;
270 unsigned int new_leaf_weight;
271 unsigned int dev_leaf_weight;
272
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -0500273 /* number of cfqq currently on this group */
274 int nr_cfqq;
275
Jens Axboe22e2c502005-06-27 10:55:12 +0200276 /*
Kyungmin Park4495a7d2011-05-31 10:04:09 +0200277 * Per group busy queues average. Useful for workload slice calc. We
Vivek Goyalb4627322010-10-22 09:48:43 +0200278 * create the array for each prio class but at run time it is used
279 * only for RT and BE class and slot for IDLE class remains unused.
280 * This is primarily done to avoid confusion and a gcc warning.
281 */
282 unsigned int busy_queues_avg[CFQ_PRIO_NR];
283 /*
284 * rr lists of queues with requests. We maintain service trees for
285 * RT and BE classes. These trees are subdivided in subclasses
286 * of SYNC, SYNC_NOIDLE and ASYNC based on workload type. For IDLE
287 * class there is no subclassification and all the cfq queues go on
288 * a single tree service_tree_idle.
Corrado Zoccoloc0324a02009-10-27 19:16:03 +0100289 * Counts are embedded in the cfq_rb_root
Jens Axboe22e2c502005-06-27 10:55:12 +0200290 */
Corrado Zoccolo718eee02009-10-26 22:45:29 +0100291 struct cfq_rb_root service_trees[2][3];
Corrado Zoccoloc0324a02009-10-27 19:16:03 +0100292 struct cfq_rb_root service_tree_idle;
Vivek Goyaldae739e2009-12-03 12:59:45 -0500293
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600294 u64 saved_wl_slice;
Vivek Goyal4d2ceea2012-10-03 16:56:57 -0400295 enum wl_type_t saved_wl_type;
296 enum wl_class_t saved_wl_class;
Tejun Heo4eef3042012-03-05 13:15:18 -0800297
Vivek Goyal80bdf0c2010-08-23 12:24:26 +0200298 /* number of requests that are on the dispatch list or inside driver */
299 int dispatched;
Shaohua Li7700fc42011-07-12 14:24:56 +0200300 struct cfq_ttime ttime;
Tejun Heo0b399202013-01-09 08:05:13 -0800301 struct cfqg_stats stats; /* stats for this cfqg */
Tejun Heo60a83702015-08-18 14:55:05 -0700302
303 /* async queue for each priority case */
304 struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR];
305 struct cfq_queue *async_idle_cfqq;
306
Vivek Goyalcdb16e82009-12-03 12:59:38 -0500307};
308
Tejun Heoc5869802011-12-14 00:33:41 +0100309struct cfq_io_cq {
310 struct io_cq icq; /* must be the first member */
311 struct cfq_queue *cfqq[2];
312 struct cfq_ttime ttime;
Tejun Heo598971b2012-03-19 15:10:58 -0700313 int ioprio; /* the current ioprio */
314#ifdef CONFIG_CFQ_GROUP_IOSCHED
Tejun Heof4da8072014-09-08 08:15:20 +0900315 uint64_t blkcg_serial_nr; /* the current blkcg serial */
Tejun Heo598971b2012-03-19 15:10:58 -0700316#endif
Tejun Heoc5869802011-12-14 00:33:41 +0100317};
318
Vivek Goyalcdb16e82009-12-03 12:59:38 -0500319/*
320 * Per block device queue structure
321 */
322struct cfq_data {
323 struct request_queue *queue;
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -0500324 /* Root service tree for cfq_groups */
325 struct cfq_rb_root grp_service_tree;
Tejun Heof51b8022012-03-05 13:15:05 -0800326 struct cfq_group *root_group;
Vivek Goyalcdb16e82009-12-03 12:59:38 -0500327
Corrado Zoccoloc0324a02009-10-27 19:16:03 +0100328 /*
329 * The priority currently being served
330 */
Vivek Goyal4d2ceea2012-10-03 16:56:57 -0400331 enum wl_class_t serving_wl_class;
332 enum wl_type_t serving_wl_type;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600333 u64 workload_expires;
Vivek Goyalcdb16e82009-12-03 12:59:38 -0500334 struct cfq_group *serving_group;
Jens Axboea36e71f2009-04-15 12:15:11 +0200335
336 /*
337 * Each priority tree is sorted by next_request position. These
338 * trees are used when determining if two or more queues are
339 * interleaving requests (see cfq_close_cooperator).
340 */
341 struct rb_root prio_trees[CFQ_PRIO_LISTS];
342
Jens Axboe22e2c502005-06-27 10:55:12 +0200343 unsigned int busy_queues;
Shaohua Lief8a41d2011-03-07 09:26:29 +0100344 unsigned int busy_sync_queues;
Jens Axboe22e2c502005-06-27 10:55:12 +0200345
Corrado Zoccolo53c583d2010-02-28 19:45:05 +0100346 int rq_in_driver;
347 int rq_in_flight[2];
Aaron Carroll45333d52008-08-26 15:52:36 +0200348
349 /*
350 * queue-depth detection
351 */
352 int rq_queued;
Jens Axboe25776e32006-06-01 10:12:26 +0200353 int hw_tag;
Corrado Zoccoloe459dd02009-11-26 10:02:57 +0100354 /*
355 * hw_tag can be
356 * -1 => indeterminate, (cfq will behave as if NCQ is present, to allow better detection)
357 * 1 => NCQ is present (hw_tag_est_depth is the estimated max depth)
358 * 0 => no NCQ
359 */
360 int hw_tag_est_depth;
361 unsigned int hw_tag_samples;
Jens Axboe22e2c502005-06-27 10:55:12 +0200362
363 /*
Jens Axboe22e2c502005-06-27 10:55:12 +0200364 * idle window management
365 */
Jan Kara91148322016-06-08 15:11:39 +0200366 struct hrtimer idle_slice_timer;
Jens Axboe23e018a2009-10-05 08:52:35 +0200367 struct work_struct unplug_work;
Jens Axboe22e2c502005-06-27 10:55:12 +0200368
369 struct cfq_queue *active_queue;
Tejun Heoc5869802011-12-14 00:33:41 +0100370 struct cfq_io_cq *active_cic;
Jens Axboe22e2c502005-06-27 10:55:12 +0200371
Jens Axboe6d048f52007-04-25 12:44:27 +0200372 sector_t last_position;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374 /*
375 * tunables, see top of file
376 */
377 unsigned int cfq_quantum;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378 unsigned int cfq_back_penalty;
379 unsigned int cfq_back_max;
Jens Axboe22e2c502005-06-27 10:55:12 +0200380 unsigned int cfq_slice_async_rq;
Jens Axboe963b72f2009-10-03 19:42:18 +0200381 unsigned int cfq_latency;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600382 u64 cfq_fifo_expire[2];
383 u64 cfq_slice[2];
384 u64 cfq_slice_idle;
385 u64 cfq_group_idle;
386 u64 cfq_target_latency;
Al Virod9ff4182006-03-18 13:51:22 -0500387
Jens Axboe6118b702009-06-30 09:34:12 +0200388 /*
389 * Fallback dummy cfqq for extreme OOM conditions
390 */
391 struct cfq_queue oom_cfqq;
Vivek Goyal365722b2009-10-03 15:21:27 +0200392
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600393 u64 last_delayed_sync;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394};
395
Vivek Goyal25fb5162009-12-03 12:59:46 -0500396static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
Tejun Heo60a83702015-08-18 14:55:05 -0700397static void cfq_put_queue(struct cfq_queue *cfqq);
Vivek Goyal25fb5162009-12-03 12:59:46 -0500398
Vivek Goyal34b98d02012-10-03 16:56:58 -0400399static struct cfq_rb_root *st_for(struct cfq_group *cfqg,
Vivek Goyal3bf10fe2012-10-03 16:56:56 -0400400 enum wl_class_t class,
Vivek Goyal65b32a52009-12-16 17:52:59 -0500401 enum wl_type_t type)
Corrado Zoccoloc0324a02009-10-27 19:16:03 +0100402{
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -0500403 if (!cfqg)
404 return NULL;
405
Vivek Goyal3bf10fe2012-10-03 16:56:56 -0400406 if (class == IDLE_WORKLOAD)
Vivek Goyalcdb16e82009-12-03 12:59:38 -0500407 return &cfqg->service_tree_idle;
Corrado Zoccoloc0324a02009-10-27 19:16:03 +0100408
Vivek Goyal3bf10fe2012-10-03 16:56:56 -0400409 return &cfqg->service_trees[class][type];
Corrado Zoccoloc0324a02009-10-27 19:16:03 +0100410}
411
Jens Axboe3b181522005-06-27 10:56:24 +0200412enum cfqq_state_flags {
Jens Axboeb0b8d7492007-01-19 11:35:30 +1100413 CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */
414 CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */
Jens Axboeb0291952009-04-07 11:38:31 +0200415 CFQ_CFQQ_FLAG_must_dispatch, /* must be allowed a dispatch */
Jens Axboeb0b8d7492007-01-19 11:35:30 +1100416 CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */
Jens Axboeb0b8d7492007-01-19 11:35:30 +1100417 CFQ_CFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */
418 CFQ_CFQQ_FLAG_idle_window, /* slice idling enabled */
419 CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */
Jens Axboe44f7c162007-01-19 11:51:58 +1100420 CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */
Vasily Tarasov91fac312007-04-25 12:29:51 +0200421 CFQ_CFQQ_FLAG_sync, /* synchronous queue */
Jeff Moyerb3b6d042009-10-23 17:14:51 -0400422 CFQ_CFQQ_FLAG_coop, /* cfqq is shared */
Shaohua Liae54abe2010-02-05 13:11:45 +0100423 CFQ_CFQQ_FLAG_split_coop, /* shared cfqq will be splitted */
Corrado Zoccolo76280af2009-11-26 10:02:58 +0100424 CFQ_CFQQ_FLAG_deep, /* sync cfqq experienced large depth */
Vivek Goyalf75edf22009-12-03 12:59:53 -0500425 CFQ_CFQQ_FLAG_wait_busy, /* Waiting for next request */
Jens Axboe3b181522005-06-27 10:56:24 +0200426};
427
428#define CFQ_CFQQ_FNS(name) \
429static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq) \
430{ \
Jens Axboefe094d92008-01-31 13:08:54 +0100431 (cfqq)->flags |= (1 << CFQ_CFQQ_FLAG_##name); \
Jens Axboe3b181522005-06-27 10:56:24 +0200432} \
433static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq) \
434{ \
Jens Axboefe094d92008-01-31 13:08:54 +0100435 (cfqq)->flags &= ~(1 << CFQ_CFQQ_FLAG_##name); \
Jens Axboe3b181522005-06-27 10:56:24 +0200436} \
437static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \
438{ \
Jens Axboefe094d92008-01-31 13:08:54 +0100439 return ((cfqq)->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \
Jens Axboe3b181522005-06-27 10:56:24 +0200440}
441
442CFQ_CFQQ_FNS(on_rr);
443CFQ_CFQQ_FNS(wait_request);
Jens Axboeb0291952009-04-07 11:38:31 +0200444CFQ_CFQQ_FNS(must_dispatch);
Jens Axboe3b181522005-06-27 10:56:24 +0200445CFQ_CFQQ_FNS(must_alloc_slice);
Jens Axboe3b181522005-06-27 10:56:24 +0200446CFQ_CFQQ_FNS(fifo_expire);
447CFQ_CFQQ_FNS(idle_window);
448CFQ_CFQQ_FNS(prio_changed);
Jens Axboe44f7c162007-01-19 11:51:58 +1100449CFQ_CFQQ_FNS(slice_new);
Vasily Tarasov91fac312007-04-25 12:29:51 +0200450CFQ_CFQQ_FNS(sync);
Jens Axboea36e71f2009-04-15 12:15:11 +0200451CFQ_CFQQ_FNS(coop);
Shaohua Liae54abe2010-02-05 13:11:45 +0100452CFQ_CFQQ_FNS(split_coop);
Corrado Zoccolo76280af2009-11-26 10:02:58 +0100453CFQ_CFQQ_FNS(deep);
Vivek Goyalf75edf22009-12-03 12:59:53 -0500454CFQ_CFQQ_FNS(wait_busy);
Jens Axboe3b181522005-06-27 10:56:24 +0200455#undef CFQ_CFQQ_FNS
456
Tejun Heo629ed0b2012-04-01 14:38:44 -0700457#if defined(CONFIG_CFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP)
Tejun Heo2ce4d502012-04-01 14:38:43 -0700458
Tejun Heo155fead2012-04-01 14:38:44 -0700459/* cfqg stats flags */
460enum cfqg_stats_flags {
461 CFQG_stats_waiting = 0,
462 CFQG_stats_idling,
463 CFQG_stats_empty,
Tejun Heo629ed0b2012-04-01 14:38:44 -0700464};
465
Tejun Heo155fead2012-04-01 14:38:44 -0700466#define CFQG_FLAG_FNS(name) \
467static inline void cfqg_stats_mark_##name(struct cfqg_stats *stats) \
Tejun Heo629ed0b2012-04-01 14:38:44 -0700468{ \
Tejun Heo155fead2012-04-01 14:38:44 -0700469 stats->flags |= (1 << CFQG_stats_##name); \
Tejun Heo629ed0b2012-04-01 14:38:44 -0700470} \
Tejun Heo155fead2012-04-01 14:38:44 -0700471static inline void cfqg_stats_clear_##name(struct cfqg_stats *stats) \
Tejun Heo629ed0b2012-04-01 14:38:44 -0700472{ \
Tejun Heo155fead2012-04-01 14:38:44 -0700473 stats->flags &= ~(1 << CFQG_stats_##name); \
Tejun Heo629ed0b2012-04-01 14:38:44 -0700474} \
Tejun Heo155fead2012-04-01 14:38:44 -0700475static inline int cfqg_stats_##name(struct cfqg_stats *stats) \
Tejun Heo629ed0b2012-04-01 14:38:44 -0700476{ \
Tejun Heo155fead2012-04-01 14:38:44 -0700477 return (stats->flags & (1 << CFQG_stats_##name)) != 0; \
Tejun Heo629ed0b2012-04-01 14:38:44 -0700478} \
479
Tejun Heo155fead2012-04-01 14:38:44 -0700480CFQG_FLAG_FNS(waiting)
481CFQG_FLAG_FNS(idling)
482CFQG_FLAG_FNS(empty)
483#undef CFQG_FLAG_FNS
Tejun Heo629ed0b2012-04-01 14:38:44 -0700484
485/* This should be called with the queue_lock held. */
Tejun Heo155fead2012-04-01 14:38:44 -0700486static void cfqg_stats_update_group_wait_time(struct cfqg_stats *stats)
Tejun Heo629ed0b2012-04-01 14:38:44 -0700487{
488 unsigned long long now;
489
Tejun Heo155fead2012-04-01 14:38:44 -0700490 if (!cfqg_stats_waiting(stats))
Tejun Heo629ed0b2012-04-01 14:38:44 -0700491 return;
492
493 now = sched_clock();
494 if (time_after64(now, stats->start_group_wait_time))
495 blkg_stat_add(&stats->group_wait_time,
496 now - stats->start_group_wait_time);
Tejun Heo155fead2012-04-01 14:38:44 -0700497 cfqg_stats_clear_waiting(stats);
Tejun Heo629ed0b2012-04-01 14:38:44 -0700498}
499
500/* This should be called with the queue_lock held. */
Tejun Heo155fead2012-04-01 14:38:44 -0700501static void cfqg_stats_set_start_group_wait_time(struct cfq_group *cfqg,
502 struct cfq_group *curr_cfqg)
Tejun Heo629ed0b2012-04-01 14:38:44 -0700503{
Tejun Heo155fead2012-04-01 14:38:44 -0700504 struct cfqg_stats *stats = &cfqg->stats;
Tejun Heo629ed0b2012-04-01 14:38:44 -0700505
Tejun Heo155fead2012-04-01 14:38:44 -0700506 if (cfqg_stats_waiting(stats))
Tejun Heo629ed0b2012-04-01 14:38:44 -0700507 return;
Tejun Heo155fead2012-04-01 14:38:44 -0700508 if (cfqg == curr_cfqg)
Tejun Heo629ed0b2012-04-01 14:38:44 -0700509 return;
Tejun Heo155fead2012-04-01 14:38:44 -0700510 stats->start_group_wait_time = sched_clock();
511 cfqg_stats_mark_waiting(stats);
Tejun Heo629ed0b2012-04-01 14:38:44 -0700512}
513
514/* This should be called with the queue_lock held. */
Tejun Heo155fead2012-04-01 14:38:44 -0700515static void cfqg_stats_end_empty_time(struct cfqg_stats *stats)
Tejun Heo629ed0b2012-04-01 14:38:44 -0700516{
517 unsigned long long now;
518
Tejun Heo155fead2012-04-01 14:38:44 -0700519 if (!cfqg_stats_empty(stats))
Tejun Heo629ed0b2012-04-01 14:38:44 -0700520 return;
521
522 now = sched_clock();
523 if (time_after64(now, stats->start_empty_time))
524 blkg_stat_add(&stats->empty_time,
525 now - stats->start_empty_time);
Tejun Heo155fead2012-04-01 14:38:44 -0700526 cfqg_stats_clear_empty(stats);
Tejun Heo629ed0b2012-04-01 14:38:44 -0700527}
528
Tejun Heo155fead2012-04-01 14:38:44 -0700529static void cfqg_stats_update_dequeue(struct cfq_group *cfqg)
Tejun Heo629ed0b2012-04-01 14:38:44 -0700530{
Tejun Heo155fead2012-04-01 14:38:44 -0700531 blkg_stat_add(&cfqg->stats.dequeue, 1);
Tejun Heo629ed0b2012-04-01 14:38:44 -0700532}
533
Tejun Heo155fead2012-04-01 14:38:44 -0700534static void cfqg_stats_set_start_empty_time(struct cfq_group *cfqg)
Tejun Heo629ed0b2012-04-01 14:38:44 -0700535{
Tejun Heo155fead2012-04-01 14:38:44 -0700536 struct cfqg_stats *stats = &cfqg->stats;
Tejun Heo629ed0b2012-04-01 14:38:44 -0700537
Tejun Heo4d5e80a2013-01-09 08:05:12 -0800538 if (blkg_rwstat_total(&stats->queued))
Tejun Heo629ed0b2012-04-01 14:38:44 -0700539 return;
540
541 /*
542 * group is already marked empty. This can happen if cfqq got new
543 * request in parent group and moved to this group while being added
544 * to service tree. Just ignore the event and move on.
545 */
Tejun Heo155fead2012-04-01 14:38:44 -0700546 if (cfqg_stats_empty(stats))
Tejun Heo629ed0b2012-04-01 14:38:44 -0700547 return;
548
549 stats->start_empty_time = sched_clock();
Tejun Heo155fead2012-04-01 14:38:44 -0700550 cfqg_stats_mark_empty(stats);
Tejun Heo629ed0b2012-04-01 14:38:44 -0700551}
552
Tejun Heo155fead2012-04-01 14:38:44 -0700553static void cfqg_stats_update_idle_time(struct cfq_group *cfqg)
Tejun Heo629ed0b2012-04-01 14:38:44 -0700554{
Tejun Heo155fead2012-04-01 14:38:44 -0700555 struct cfqg_stats *stats = &cfqg->stats;
Tejun Heo629ed0b2012-04-01 14:38:44 -0700556
Tejun Heo155fead2012-04-01 14:38:44 -0700557 if (cfqg_stats_idling(stats)) {
Tejun Heo629ed0b2012-04-01 14:38:44 -0700558 unsigned long long now = sched_clock();
559
560 if (time_after64(now, stats->start_idle_time))
561 blkg_stat_add(&stats->idle_time,
562 now - stats->start_idle_time);
Tejun Heo155fead2012-04-01 14:38:44 -0700563 cfqg_stats_clear_idling(stats);
Tejun Heo629ed0b2012-04-01 14:38:44 -0700564 }
565}
566
Tejun Heo155fead2012-04-01 14:38:44 -0700567static void cfqg_stats_set_start_idle_time(struct cfq_group *cfqg)
Tejun Heo629ed0b2012-04-01 14:38:44 -0700568{
Tejun Heo155fead2012-04-01 14:38:44 -0700569 struct cfqg_stats *stats = &cfqg->stats;
Tejun Heo629ed0b2012-04-01 14:38:44 -0700570
Tejun Heo155fead2012-04-01 14:38:44 -0700571 BUG_ON(cfqg_stats_idling(stats));
Tejun Heo629ed0b2012-04-01 14:38:44 -0700572
573 stats->start_idle_time = sched_clock();
Tejun Heo155fead2012-04-01 14:38:44 -0700574 cfqg_stats_mark_idling(stats);
Tejun Heo629ed0b2012-04-01 14:38:44 -0700575}
576
Tejun Heo155fead2012-04-01 14:38:44 -0700577static void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg)
Tejun Heo629ed0b2012-04-01 14:38:44 -0700578{
Tejun Heo155fead2012-04-01 14:38:44 -0700579 struct cfqg_stats *stats = &cfqg->stats;
Tejun Heo629ed0b2012-04-01 14:38:44 -0700580
581 blkg_stat_add(&stats->avg_queue_size_sum,
Tejun Heo4d5e80a2013-01-09 08:05:12 -0800582 blkg_rwstat_total(&stats->queued));
Tejun Heo629ed0b2012-04-01 14:38:44 -0700583 blkg_stat_add(&stats->avg_queue_size_samples, 1);
Tejun Heo155fead2012-04-01 14:38:44 -0700584 cfqg_stats_update_group_wait_time(stats);
Tejun Heo629ed0b2012-04-01 14:38:44 -0700585}
586
587#else /* CONFIG_CFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
588
Tejun Heof48ec1d2012-04-13 13:11:25 -0700589static inline void cfqg_stats_set_start_group_wait_time(struct cfq_group *cfqg, struct cfq_group *curr_cfqg) { }
590static inline void cfqg_stats_end_empty_time(struct cfqg_stats *stats) { }
591static inline void cfqg_stats_update_dequeue(struct cfq_group *cfqg) { }
592static inline void cfqg_stats_set_start_empty_time(struct cfq_group *cfqg) { }
593static inline void cfqg_stats_update_idle_time(struct cfq_group *cfqg) { }
594static inline void cfqg_stats_set_start_idle_time(struct cfq_group *cfqg) { }
595static inline void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg) { }
Tejun Heo629ed0b2012-04-01 14:38:44 -0700596
597#endif /* CONFIG_CFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
598
599#ifdef CONFIG_CFQ_GROUP_IOSCHED
Tejun Heo2ce4d502012-04-01 14:38:43 -0700600
Jens Axboe4ceab712015-06-19 10:13:01 -0600601static inline struct cfq_group *pd_to_cfqg(struct blkg_policy_data *pd)
602{
603 return pd ? container_of(pd, struct cfq_group, pd) : NULL;
604}
605
606static struct cfq_group_data
607*cpd_to_cfqgd(struct blkcg_policy_data *cpd)
608{
Tejun Heo81437642015-08-18 14:55:15 -0700609 return cpd ? container_of(cpd, struct cfq_group_data, cpd) : NULL;
Jens Axboe4ceab712015-06-19 10:13:01 -0600610}
611
612static inline struct blkcg_gq *cfqg_to_blkg(struct cfq_group *cfqg)
613{
614 return pd_to_blkg(&cfqg->pd);
615}
616
Tejun Heoffea73f2012-06-04 10:02:29 +0200617static struct blkcg_policy blkcg_policy_cfq;
618
619static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg)
620{
621 return pd_to_cfqg(blkg_to_pd(blkg, &blkcg_policy_cfq));
622}
623
Arianna Avanzinie48453c2015-06-05 23:38:42 +0200624static struct cfq_group_data *blkcg_to_cfqgd(struct blkcg *blkcg)
625{
626 return cpd_to_cfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_cfq));
627}
628
Tejun Heod02f7aa2013-01-09 08:05:11 -0800629static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg)
Tejun Heo7918ffb2013-01-09 08:05:11 -0800630{
Tejun Heod02f7aa2013-01-09 08:05:11 -0800631 struct blkcg_gq *pblkg = cfqg_to_blkg(cfqg)->parent;
Tejun Heo7918ffb2013-01-09 08:05:11 -0800632
Tejun Heod02f7aa2013-01-09 08:05:11 -0800633 return pblkg ? blkg_to_cfqg(pblkg) : NULL;
Tejun Heo7918ffb2013-01-09 08:05:11 -0800634}
635
Jan Kara3984aa52016-01-12 16:24:19 +0100636static inline bool cfqg_is_descendant(struct cfq_group *cfqg,
637 struct cfq_group *ancestor)
638{
639 return cgroup_is_descendant(cfqg_to_blkg(cfqg)->blkcg->css.cgroup,
640 cfqg_to_blkg(ancestor)->blkcg->css.cgroup);
641}
642
Tejun Heoeb7d8c072012-03-23 14:02:53 +0100643static inline void cfqg_get(struct cfq_group *cfqg)
644{
645 return blkg_get(cfqg_to_blkg(cfqg));
646}
647
648static inline void cfqg_put(struct cfq_group *cfqg)
649{
650 return blkg_put(cfqg_to_blkg(cfqg));
651}
652
Tejun Heo54e7ed12012-04-16 13:57:23 -0700653#define cfq_log_cfqq(cfqd, cfqq, fmt, args...) do { \
654 char __pbuf[128]; \
655 \
656 blkg_path(cfqg_to_blkg((cfqq)->cfqg), __pbuf, sizeof(__pbuf)); \
Vivek Goyalb226e5c2012-10-03 16:57:01 -0400657 blk_add_trace_msg((cfqd)->queue, "cfq%d%c%c %s " fmt, (cfqq)->pid, \
658 cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \
659 cfqq_type((cfqq)) == SYNC_NOIDLE_WORKLOAD ? 'N' : ' ',\
Tejun Heo54e7ed12012-04-16 13:57:23 -0700660 __pbuf, ##args); \
661} while (0)
Vivek Goyal2868ef72009-12-03 12:59:48 -0500662
Tejun Heo54e7ed12012-04-16 13:57:23 -0700663#define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do { \
664 char __pbuf[128]; \
665 \
666 blkg_path(cfqg_to_blkg(cfqg), __pbuf, sizeof(__pbuf)); \
667 blk_add_trace_msg((cfqd)->queue, "%s " fmt, __pbuf, ##args); \
668} while (0)
Vivek Goyal2868ef72009-12-03 12:59:48 -0500669
Tejun Heo155fead2012-04-01 14:38:44 -0700670static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg,
Christoph Hellwigef295ec2016-10-28 08:48:16 -0600671 struct cfq_group *curr_cfqg,
672 unsigned int op)
Tejun Heo2ce4d502012-04-01 14:38:43 -0700673{
Christoph Hellwigef295ec2016-10-28 08:48:16 -0600674 blkg_rwstat_add(&cfqg->stats.queued, op, 1);
Tejun Heo155fead2012-04-01 14:38:44 -0700675 cfqg_stats_end_empty_time(&cfqg->stats);
676 cfqg_stats_set_start_group_wait_time(cfqg, curr_cfqg);
Tejun Heo2ce4d502012-04-01 14:38:43 -0700677}
678
Tejun Heo155fead2012-04-01 14:38:44 -0700679static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg,
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600680 uint64_t time, unsigned long unaccounted_time)
Tejun Heo2ce4d502012-04-01 14:38:43 -0700681{
Tejun Heo155fead2012-04-01 14:38:44 -0700682 blkg_stat_add(&cfqg->stats.time, time);
Tejun Heo629ed0b2012-04-01 14:38:44 -0700683#ifdef CONFIG_DEBUG_BLK_CGROUP
Tejun Heo155fead2012-04-01 14:38:44 -0700684 blkg_stat_add(&cfqg->stats.unaccounted_time, unaccounted_time);
Tejun Heo629ed0b2012-04-01 14:38:44 -0700685#endif
Tejun Heo2ce4d502012-04-01 14:38:43 -0700686}
687
Christoph Hellwigef295ec2016-10-28 08:48:16 -0600688static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg,
689 unsigned int op)
Tejun Heo2ce4d502012-04-01 14:38:43 -0700690{
Christoph Hellwigef295ec2016-10-28 08:48:16 -0600691 blkg_rwstat_add(&cfqg->stats.queued, op, -1);
Tejun Heo2ce4d502012-04-01 14:38:43 -0700692}
693
Christoph Hellwigef295ec2016-10-28 08:48:16 -0600694static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg,
695 unsigned int op)
Tejun Heo2ce4d502012-04-01 14:38:43 -0700696{
Christoph Hellwigef295ec2016-10-28 08:48:16 -0600697 blkg_rwstat_add(&cfqg->stats.merged, op, 1);
Tejun Heo2ce4d502012-04-01 14:38:43 -0700698}
699
Tejun Heo155fead2012-04-01 14:38:44 -0700700static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
Christoph Hellwigef295ec2016-10-28 08:48:16 -0600701 uint64_t start_time, uint64_t io_start_time,
702 unsigned int op)
Tejun Heo2ce4d502012-04-01 14:38:43 -0700703{
Tejun Heo155fead2012-04-01 14:38:44 -0700704 struct cfqg_stats *stats = &cfqg->stats;
Tejun Heo629ed0b2012-04-01 14:38:44 -0700705 unsigned long long now = sched_clock();
Tejun Heo629ed0b2012-04-01 14:38:44 -0700706
707 if (time_after64(now, io_start_time))
Christoph Hellwigef295ec2016-10-28 08:48:16 -0600708 blkg_rwstat_add(&stats->service_time, op, now - io_start_time);
Tejun Heo629ed0b2012-04-01 14:38:44 -0700709 if (time_after64(io_start_time, start_time))
Christoph Hellwigef295ec2016-10-28 08:48:16 -0600710 blkg_rwstat_add(&stats->wait_time, op,
Tejun Heo629ed0b2012-04-01 14:38:44 -0700711 io_start_time - start_time);
Tejun Heo2ce4d502012-04-01 14:38:43 -0700712}
713
Tejun Heo689665a2013-01-09 08:05:13 -0800714/* @stats = 0 */
715static void cfqg_stats_reset(struct cfqg_stats *stats)
Tejun Heo155fead2012-04-01 14:38:44 -0700716{
Tejun Heo155fead2012-04-01 14:38:44 -0700717 /* queued stats shouldn't be cleared */
Tejun Heo155fead2012-04-01 14:38:44 -0700718 blkg_rwstat_reset(&stats->merged);
719 blkg_rwstat_reset(&stats->service_time);
720 blkg_rwstat_reset(&stats->wait_time);
721 blkg_stat_reset(&stats->time);
722#ifdef CONFIG_DEBUG_BLK_CGROUP
723 blkg_stat_reset(&stats->unaccounted_time);
724 blkg_stat_reset(&stats->avg_queue_size_sum);
725 blkg_stat_reset(&stats->avg_queue_size_samples);
726 blkg_stat_reset(&stats->dequeue);
727 blkg_stat_reset(&stats->group_wait_time);
728 blkg_stat_reset(&stats->idle_time);
729 blkg_stat_reset(&stats->empty_time);
730#endif
731}
732
Tejun Heo0b399202013-01-09 08:05:13 -0800733/* @to += @from */
Tejun Heoe6269c42015-08-18 14:55:21 -0700734static void cfqg_stats_add_aux(struct cfqg_stats *to, struct cfqg_stats *from)
Tejun Heo0b399202013-01-09 08:05:13 -0800735{
736 /* queued stats shouldn't be cleared */
Tejun Heoe6269c42015-08-18 14:55:21 -0700737 blkg_rwstat_add_aux(&to->merged, &from->merged);
738 blkg_rwstat_add_aux(&to->service_time, &from->service_time);
739 blkg_rwstat_add_aux(&to->wait_time, &from->wait_time);
740 blkg_stat_add_aux(&from->time, &from->time);
Tejun Heo0b399202013-01-09 08:05:13 -0800741#ifdef CONFIG_DEBUG_BLK_CGROUP
Tejun Heoe6269c42015-08-18 14:55:21 -0700742 blkg_stat_add_aux(&to->unaccounted_time, &from->unaccounted_time);
743 blkg_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum);
744 blkg_stat_add_aux(&to->avg_queue_size_samples, &from->avg_queue_size_samples);
745 blkg_stat_add_aux(&to->dequeue, &from->dequeue);
746 blkg_stat_add_aux(&to->group_wait_time, &from->group_wait_time);
747 blkg_stat_add_aux(&to->idle_time, &from->idle_time);
748 blkg_stat_add_aux(&to->empty_time, &from->empty_time);
Tejun Heo0b399202013-01-09 08:05:13 -0800749#endif
750}
751
752/*
Tejun Heoe6269c42015-08-18 14:55:21 -0700753 * Transfer @cfqg's stats to its parent's aux counts so that the ancestors'
Tejun Heo0b399202013-01-09 08:05:13 -0800754 * recursive stats can still account for the amount used by this cfqg after
755 * it's gone.
756 */
757static void cfqg_stats_xfer_dead(struct cfq_group *cfqg)
758{
759 struct cfq_group *parent = cfqg_parent(cfqg);
760
761 lockdep_assert_held(cfqg_to_blkg(cfqg)->q->queue_lock);
762
763 if (unlikely(!parent))
764 return;
765
Tejun Heoe6269c42015-08-18 14:55:21 -0700766 cfqg_stats_add_aux(&parent->stats, &cfqg->stats);
Tejun Heo0b399202013-01-09 08:05:13 -0800767 cfqg_stats_reset(&cfqg->stats);
Tejun Heo0b399202013-01-09 08:05:13 -0800768}
769
Tejun Heoeb7d8c072012-03-23 14:02:53 +0100770#else /* CONFIG_CFQ_GROUP_IOSCHED */
771
Tejun Heod02f7aa2013-01-09 08:05:11 -0800772static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg) { return NULL; }
Jan Kara3984aa52016-01-12 16:24:19 +0100773static inline bool cfqg_is_descendant(struct cfq_group *cfqg,
774 struct cfq_group *ancestor)
775{
776 return true;
777}
Tejun Heoeb7d8c072012-03-23 14:02:53 +0100778static inline void cfqg_get(struct cfq_group *cfqg) { }
779static inline void cfqg_put(struct cfq_group *cfqg) { }
780
Jens Axboe7b679132008-05-30 12:23:07 +0200781#define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
Vivek Goyalb226e5c2012-10-03 16:57:01 -0400782 blk_add_trace_msg((cfqd)->queue, "cfq%d%c%c " fmt, (cfqq)->pid, \
783 cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \
784 cfqq_type((cfqq)) == SYNC_NOIDLE_WORKLOAD ? 'N' : ' ',\
785 ##args)
Kyungmin Park4495a7d2011-05-31 10:04:09 +0200786#define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do {} while (0)
Tejun Heoeb7d8c072012-03-23 14:02:53 +0100787
Tejun Heo155fead2012-04-01 14:38:44 -0700788static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg,
Christoph Hellwigef295ec2016-10-28 08:48:16 -0600789 struct cfq_group *curr_cfqg, unsigned int op) { }
Tejun Heo155fead2012-04-01 14:38:44 -0700790static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg,
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600791 uint64_t time, unsigned long unaccounted_time) { }
Christoph Hellwigef295ec2016-10-28 08:48:16 -0600792static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg,
793 unsigned int op) { }
794static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg,
795 unsigned int op) { }
Tejun Heo155fead2012-04-01 14:38:44 -0700796static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
Christoph Hellwigef295ec2016-10-28 08:48:16 -0600797 uint64_t start_time, uint64_t io_start_time,
798 unsigned int op) { }
Tejun Heo2ce4d502012-04-01 14:38:43 -0700799
Tejun Heoeb7d8c072012-03-23 14:02:53 +0100800#endif /* CONFIG_CFQ_GROUP_IOSCHED */
801
Jens Axboe7b679132008-05-30 12:23:07 +0200802#define cfq_log(cfqd, fmt, args...) \
803 blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args)
804
Vivek Goyal615f0252009-12-03 12:59:39 -0500805/* Traverses through cfq group service trees */
806#define for_each_cfqg_st(cfqg, i, j, st) \
807 for (i = 0; i <= IDLE_WORKLOAD; i++) \
808 for (j = 0, st = i < IDLE_WORKLOAD ? &cfqg->service_trees[i][j]\
809 : &cfqg->service_tree_idle; \
810 (i < IDLE_WORKLOAD && j <= SYNC_WORKLOAD) || \
811 (i == IDLE_WORKLOAD && j == 0); \
812 j++, st = i < IDLE_WORKLOAD ? \
813 &cfqg->service_trees[i][j]: NULL) \
814
Shaohua Lif5f2b6c2011-07-12 14:24:55 +0200815static inline bool cfq_io_thinktime_big(struct cfq_data *cfqd,
816 struct cfq_ttime *ttime, bool group_idle)
817{
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600818 u64 slice;
Shaohua Lif5f2b6c2011-07-12 14:24:55 +0200819 if (!sample_valid(ttime->ttime_samples))
820 return false;
821 if (group_idle)
822 slice = cfqd->cfq_group_idle;
823 else
824 slice = cfqd->cfq_slice_idle;
825 return ttime->ttime_mean > slice;
826}
Vivek Goyal615f0252009-12-03 12:59:39 -0500827
Vivek Goyal02b35082010-08-23 12:23:53 +0200828static inline bool iops_mode(struct cfq_data *cfqd)
829{
830 /*
831 * If we are not idling on queues and it is a NCQ drive, parallel
832 * execution of requests is on and measuring time is not possible
833 * in most of the cases until and unless we drive shallower queue
834 * depths and that becomes a performance bottleneck. In such cases
835 * switch to start providing fairness in terms of number of IOs.
836 */
837 if (!cfqd->cfq_slice_idle && cfqd->hw_tag)
838 return true;
839 else
840 return false;
841}
842
Vivek Goyal3bf10fe2012-10-03 16:56:56 -0400843static inline enum wl_class_t cfqq_class(struct cfq_queue *cfqq)
Corrado Zoccoloc0324a02009-10-27 19:16:03 +0100844{
845 if (cfq_class_idle(cfqq))
846 return IDLE_WORKLOAD;
847 if (cfq_class_rt(cfqq))
848 return RT_WORKLOAD;
849 return BE_WORKLOAD;
850}
851
Corrado Zoccolo718eee02009-10-26 22:45:29 +0100852
853static enum wl_type_t cfqq_type(struct cfq_queue *cfqq)
854{
855 if (!cfq_cfqq_sync(cfqq))
856 return ASYNC_WORKLOAD;
857 if (!cfq_cfqq_idle_window(cfqq))
858 return SYNC_NOIDLE_WORKLOAD;
859 return SYNC_WORKLOAD;
860}
861
Vivek Goyal3bf10fe2012-10-03 16:56:56 -0400862static inline int cfq_group_busy_queues_wl(enum wl_class_t wl_class,
Vivek Goyal58ff82f2009-12-03 12:59:44 -0500863 struct cfq_data *cfqd,
864 struct cfq_group *cfqg)
Corrado Zoccoloc0324a02009-10-27 19:16:03 +0100865{
Vivek Goyal3bf10fe2012-10-03 16:56:56 -0400866 if (wl_class == IDLE_WORKLOAD)
Vivek Goyalcdb16e82009-12-03 12:59:38 -0500867 return cfqg->service_tree_idle.count;
868
Vivek Goyal34b98d02012-10-03 16:56:58 -0400869 return cfqg->service_trees[wl_class][ASYNC_WORKLOAD].count +
870 cfqg->service_trees[wl_class][SYNC_NOIDLE_WORKLOAD].count +
871 cfqg->service_trees[wl_class][SYNC_WORKLOAD].count;
Corrado Zoccoloc0324a02009-10-27 19:16:03 +0100872}
873
Vivek Goyalf26bd1f2009-12-03 12:59:54 -0500874static inline int cfqg_busy_async_queues(struct cfq_data *cfqd,
875 struct cfq_group *cfqg)
876{
Vivek Goyal34b98d02012-10-03 16:56:58 -0400877 return cfqg->service_trees[RT_WORKLOAD][ASYNC_WORKLOAD].count +
878 cfqg->service_trees[BE_WORKLOAD][ASYNC_WORKLOAD].count;
Vivek Goyalf26bd1f2009-12-03 12:59:54 -0500879}
880
Jens Axboe165125e2007-07-24 09:28:11 +0200881static void cfq_dispatch_insert(struct request_queue *, struct request *);
Tejun Heo4f85cb92012-03-05 13:15:28 -0800882static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, bool is_sync,
Tejun Heo2da8de02015-08-18 14:55:02 -0700883 struct cfq_io_cq *cic, struct bio *bio);
Vasily Tarasov91fac312007-04-25 12:29:51 +0200884
Tejun Heoc5869802011-12-14 00:33:41 +0100885static inline struct cfq_io_cq *icq_to_cic(struct io_cq *icq)
886{
887 /* cic->icq is the first member, %NULL will convert to %NULL */
888 return container_of(icq, struct cfq_io_cq, icq);
889}
890
Tejun Heo47fdd4c2011-12-14 00:33:42 +0100891static inline struct cfq_io_cq *cfq_cic_lookup(struct cfq_data *cfqd,
892 struct io_context *ioc)
893{
894 if (ioc)
895 return icq_to_cic(ioc_lookup_icq(ioc, cfqd->queue));
896 return NULL;
897}
898
Tejun Heoc5869802011-12-14 00:33:41 +0100899static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_cq *cic, bool is_sync)
Vasily Tarasov91fac312007-04-25 12:29:51 +0200900{
Jens Axboea6151c32009-10-07 20:02:57 +0200901 return cic->cfqq[is_sync];
Vasily Tarasov91fac312007-04-25 12:29:51 +0200902}
903
Tejun Heoc5869802011-12-14 00:33:41 +0100904static inline void cic_set_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq,
905 bool is_sync)
Vasily Tarasov91fac312007-04-25 12:29:51 +0200906{
Jens Axboea6151c32009-10-07 20:02:57 +0200907 cic->cfqq[is_sync] = cfqq;
Vasily Tarasov91fac312007-04-25 12:29:51 +0200908}
909
Tejun Heoc5869802011-12-14 00:33:41 +0100910static inline struct cfq_data *cic_to_cfqd(struct cfq_io_cq *cic)
Konstantin Khlebnikovbca4b912010-05-20 23:21:34 +0400911{
Tejun Heoc5869802011-12-14 00:33:41 +0100912 return cic->icq.q->elevator->elevator_data;
Konstantin Khlebnikovbca4b912010-05-20 23:21:34 +0400913}
914
Vasily Tarasov91fac312007-04-25 12:29:51 +0200915/*
Andrew Morton99f95e52005-06-27 20:14:05 -0700916 * scheduler run of queue, if there are requests pending and no one in the
917 * driver that will restart queueing
918 */
Jens Axboe23e018a2009-10-05 08:52:35 +0200919static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
Andrew Morton99f95e52005-06-27 20:14:05 -0700920{
Jens Axboe7b679132008-05-30 12:23:07 +0200921 if (cfqd->busy_queues) {
922 cfq_log(cfqd, "schedule dispatch");
Jens Axboe59c3d452014-04-08 09:15:35 -0600923 kblockd_schedule_work(&cfqd->unplug_work);
Jens Axboe7b679132008-05-30 12:23:07 +0200924 }
Andrew Morton99f95e52005-06-27 20:14:05 -0700925}
926
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927/*
Jens Axboe44f7c162007-01-19 11:51:58 +1100928 * Scale schedule slice based on io priority. Use the sync time slice only
929 * if a queue is marked sync and has sync io queued. A sync queue with async
930 * io only, should not get full sync slice length.
931 */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600932static inline u64 cfq_prio_slice(struct cfq_data *cfqd, bool sync,
Jens Axboed9e76202007-04-20 14:27:50 +0200933 unsigned short prio)
934{
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600935 u64 base_slice = cfqd->cfq_slice[sync];
936 u64 slice = div_u64(base_slice, CFQ_SLICE_SCALE);
Jens Axboed9e76202007-04-20 14:27:50 +0200937
938 WARN_ON(prio >= IOPRIO_BE_NR);
939
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600940 return base_slice + (slice * (4 - prio));
Jens Axboed9e76202007-04-20 14:27:50 +0200941}
942
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600943static inline u64
Jens Axboe44f7c162007-01-19 11:51:58 +1100944cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
945{
Jens Axboed9e76202007-04-20 14:27:50 +0200946 return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
Jens Axboe44f7c162007-01-19 11:51:58 +1100947}
948
Tejun Heo1d3650f2013-01-09 08:05:11 -0800949/**
950 * cfqg_scale_charge - scale disk time charge according to cfqg weight
951 * @charge: disk time being charged
952 * @vfraction: vfraction of the cfqg, fixed point w/ CFQ_SERVICE_SHIFT
953 *
954 * Scale @charge according to @vfraction, which is in range (0, 1]. The
955 * scaling is inversely proportional.
956 *
957 * scaled = charge / vfraction
958 *
959 * The result is also in fixed point w/ CFQ_SERVICE_SHIFT.
960 */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600961static inline u64 cfqg_scale_charge(u64 charge,
Tejun Heo1d3650f2013-01-09 08:05:11 -0800962 unsigned int vfraction)
Vivek Goyal25bc6b02009-12-03 12:59:43 -0500963{
Tejun Heo1d3650f2013-01-09 08:05:11 -0800964 u64 c = charge << CFQ_SERVICE_SHIFT; /* make it fixed point */
Vivek Goyal25bc6b02009-12-03 12:59:43 -0500965
Tejun Heo1d3650f2013-01-09 08:05:11 -0800966 /* charge / vfraction */
967 c <<= CFQ_SERVICE_SHIFT;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -0600968 return div_u64(c, vfraction);
Vivek Goyal25bc6b02009-12-03 12:59:43 -0500969}
970
971static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime)
972{
973 s64 delta = (s64)(vdisktime - min_vdisktime);
974 if (delta > 0)
975 min_vdisktime = vdisktime;
976
977 return min_vdisktime;
978}
979
980static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime)
981{
982 s64 delta = (s64)(vdisktime - min_vdisktime);
983 if (delta < 0)
984 min_vdisktime = vdisktime;
985
986 return min_vdisktime;
987}
988
989static void update_min_vdisktime(struct cfq_rb_root *st)
990{
Vivek Goyal25bc6b02009-12-03 12:59:43 -0500991 struct cfq_group *cfqg;
992
Vivek Goyal25bc6b02009-12-03 12:59:43 -0500993 if (st->left) {
994 cfqg = rb_entry_cfqg(st->left);
Gui Jianfenga6032712011-03-07 09:28:09 +0100995 st->min_vdisktime = max_vdisktime(st->min_vdisktime,
996 cfqg->vdisktime);
Vivek Goyal25bc6b02009-12-03 12:59:43 -0500997 }
Vivek Goyal25bc6b02009-12-03 12:59:43 -0500998}
999
Corrado Zoccolo5db5d642009-10-26 22:44:04 +01001000/*
1001 * get averaged number of queues of RT/BE priority.
1002 * average is updated, with a formula that gives more weight to higher numbers,
1003 * to quickly follows sudden increases and decrease slowly
1004 */
1005
Vivek Goyal58ff82f2009-12-03 12:59:44 -05001006static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd,
1007 struct cfq_group *cfqg, bool rt)
Jens Axboe5869619c2009-10-28 09:27:07 +01001008{
Corrado Zoccolo5db5d642009-10-26 22:44:04 +01001009 unsigned min_q, max_q;
1010 unsigned mult = cfq_hist_divisor - 1;
1011 unsigned round = cfq_hist_divisor / 2;
Vivek Goyal58ff82f2009-12-03 12:59:44 -05001012 unsigned busy = cfq_group_busy_queues_wl(rt, cfqd, cfqg);
Corrado Zoccolo5db5d642009-10-26 22:44:04 +01001013
Vivek Goyal58ff82f2009-12-03 12:59:44 -05001014 min_q = min(cfqg->busy_queues_avg[rt], busy);
1015 max_q = max(cfqg->busy_queues_avg[rt], busy);
1016 cfqg->busy_queues_avg[rt] = (mult * max_q + min_q + round) /
Corrado Zoccolo5db5d642009-10-26 22:44:04 +01001017 cfq_hist_divisor;
Vivek Goyal58ff82f2009-12-03 12:59:44 -05001018 return cfqg->busy_queues_avg[rt];
1019}
1020
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001021static inline u64
Vivek Goyal58ff82f2009-12-03 12:59:44 -05001022cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg)
1023{
Tejun Heo41cad6a2013-01-09 08:05:11 -08001024 return cfqd->cfq_target_latency * cfqg->vfraction >> CFQ_SERVICE_SHIFT;
Corrado Zoccolo5db5d642009-10-26 22:44:04 +01001025}
1026
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001027static inline u64
Vivek Goyalba5bd522011-01-19 08:25:02 -07001028cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
Jens Axboe44f7c162007-01-19 11:51:58 +11001029{
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001030 u64 slice = cfq_prio_to_slice(cfqd, cfqq);
Corrado Zoccolo5db5d642009-10-26 22:44:04 +01001031 if (cfqd->cfq_latency) {
Vivek Goyal58ff82f2009-12-03 12:59:44 -05001032 /*
1033 * interested queues (we consider only the ones with the same
1034 * priority class in the cfq group)
1035 */
1036 unsigned iq = cfq_group_get_avg_queues(cfqd, cfqq->cfqg,
1037 cfq_class_rt(cfqq));
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001038 u64 sync_slice = cfqd->cfq_slice[1];
1039 u64 expect_latency = sync_slice * iq;
1040 u64 group_slice = cfq_group_slice(cfqd, cfqq->cfqg);
Vivek Goyal58ff82f2009-12-03 12:59:44 -05001041
1042 if (expect_latency > group_slice) {
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001043 u64 base_low_slice = 2 * cfqd->cfq_slice_idle;
1044 u64 low_slice;
1045
Corrado Zoccolo5db5d642009-10-26 22:44:04 +01001046 /* scale low_slice according to IO priority
1047 * and sync vs async */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001048 low_slice = div64_u64(base_low_slice*slice, sync_slice);
1049 low_slice = min(slice, low_slice);
Corrado Zoccolo5db5d642009-10-26 22:44:04 +01001050 /* the adapted slice value is scaled to fit all iqs
1051 * into the target latency */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001052 slice = div64_u64(slice*group_slice, expect_latency);
1053 slice = max(slice, low_slice);
Corrado Zoccolo5db5d642009-10-26 22:44:04 +01001054 }
1055 }
Shaohua Lic553f8e2011-01-14 08:41:03 +01001056 return slice;
1057}
1058
1059static inline void
1060cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1061{
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001062 u64 slice = cfq_scaled_cfqq_slice(cfqd, cfqq);
1063 u64 now = ktime_get_ns();
Shaohua Lic553f8e2011-01-14 08:41:03 +01001064
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001065 cfqq->slice_start = now;
1066 cfqq->slice_end = now + slice;
Vivek Goyalf75edf22009-12-03 12:59:53 -05001067 cfqq->allocated_slice = slice;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001068 cfq_log_cfqq(cfqd, cfqq, "set_slice=%llu", cfqq->slice_end - now);
Jens Axboe44f7c162007-01-19 11:51:58 +11001069}
1070
1071/*
1072 * We need to wrap this check in cfq_cfqq_slice_new(), since ->slice_end
1073 * isn't valid until the first request from the dispatch is activated
1074 * and the slice time set.
1075 */
Jens Axboea6151c32009-10-07 20:02:57 +02001076static inline bool cfq_slice_used(struct cfq_queue *cfqq)
Jens Axboe44f7c162007-01-19 11:51:58 +11001077{
1078 if (cfq_cfqq_slice_new(cfqq))
Shaohua Lic1e44752010-11-08 15:01:02 +01001079 return false;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001080 if (ktime_get_ns() < cfqq->slice_end)
Shaohua Lic1e44752010-11-08 15:01:02 +01001081 return false;
Jens Axboe44f7c162007-01-19 11:51:58 +11001082
Shaohua Lic1e44752010-11-08 15:01:02 +01001083 return true;
Jens Axboe44f7c162007-01-19 11:51:58 +11001084}
1085
1086/*
Jens Axboe5e705372006-07-13 12:39:25 +02001087 * Lifted from AS - choose which of rq1 and rq2 that is best served now.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088 * We choose the request that is closest to the head right now. Distance
Andreas Mohre8a99052006-03-28 08:59:49 +02001089 * behind the head is penalized and only allowed to a certain extent.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001090 */
Jens Axboe5e705372006-07-13 12:39:25 +02001091static struct request *
Corrado Zoccolocf7c25c2009-11-08 17:16:46 +01001092cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2, sector_t last)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093{
Corrado Zoccolocf7c25c2009-11-08 17:16:46 +01001094 sector_t s1, s2, d1 = 0, d2 = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001095 unsigned long back_max;
Andreas Mohre8a99052006-03-28 08:59:49 +02001096#define CFQ_RQ1_WRAP 0x01 /* request 1 wraps */
1097#define CFQ_RQ2_WRAP 0x02 /* request 2 wraps */
1098 unsigned wrap = 0; /* bit mask: requests behind the disk head? */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099
Jens Axboe5e705372006-07-13 12:39:25 +02001100 if (rq1 == NULL || rq1 == rq2)
1101 return rq2;
1102 if (rq2 == NULL)
1103 return rq1;
Jens Axboe9c2c38a2005-08-24 14:57:54 +02001104
Namhyung Kim229836b2011-05-24 10:23:21 +02001105 if (rq_is_sync(rq1) != rq_is_sync(rq2))
1106 return rq_is_sync(rq1) ? rq1 : rq2;
1107
Christoph Hellwig65299a32011-08-23 14:50:29 +02001108 if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_PRIO)
1109 return rq1->cmd_flags & REQ_PRIO ? rq1 : rq2;
Jens Axboeb53d1ed2011-08-19 08:34:48 +02001110
Tejun Heo83096eb2009-05-07 22:24:39 +09001111 s1 = blk_rq_pos(rq1);
1112 s2 = blk_rq_pos(rq2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001113
Linus Torvalds1da177e2005-04-16 15:20:36 -07001114 /*
1115 * by definition, 1KiB is 2 sectors
1116 */
1117 back_max = cfqd->cfq_back_max * 2;
1118
1119 /*
1120 * Strict one way elevator _except_ in the case where we allow
1121 * short backward seeks which are biased as twice the cost of a
1122 * similar forward seek.
1123 */
1124 if (s1 >= last)
1125 d1 = s1 - last;
1126 else if (s1 + back_max >= last)
1127 d1 = (last - s1) * cfqd->cfq_back_penalty;
1128 else
Andreas Mohre8a99052006-03-28 08:59:49 +02001129 wrap |= CFQ_RQ1_WRAP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130
1131 if (s2 >= last)
1132 d2 = s2 - last;
1133 else if (s2 + back_max >= last)
1134 d2 = (last - s2) * cfqd->cfq_back_penalty;
1135 else
Andreas Mohre8a99052006-03-28 08:59:49 +02001136 wrap |= CFQ_RQ2_WRAP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137
1138 /* Found required data */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139
Andreas Mohre8a99052006-03-28 08:59:49 +02001140 /*
1141 * By doing switch() on the bit mask "wrap" we avoid having to
1142 * check two variables for all permutations: --> faster!
1143 */
1144 switch (wrap) {
Jens Axboe5e705372006-07-13 12:39:25 +02001145 case 0: /* common case for CFQ: rq1 and rq2 not wrapped */
Andreas Mohre8a99052006-03-28 08:59:49 +02001146 if (d1 < d2)
Jens Axboe5e705372006-07-13 12:39:25 +02001147 return rq1;
Andreas Mohre8a99052006-03-28 08:59:49 +02001148 else if (d2 < d1)
Jens Axboe5e705372006-07-13 12:39:25 +02001149 return rq2;
Andreas Mohre8a99052006-03-28 08:59:49 +02001150 else {
1151 if (s1 >= s2)
Jens Axboe5e705372006-07-13 12:39:25 +02001152 return rq1;
Andreas Mohre8a99052006-03-28 08:59:49 +02001153 else
Jens Axboe5e705372006-07-13 12:39:25 +02001154 return rq2;
Andreas Mohre8a99052006-03-28 08:59:49 +02001155 }
1156
1157 case CFQ_RQ2_WRAP:
Jens Axboe5e705372006-07-13 12:39:25 +02001158 return rq1;
Andreas Mohre8a99052006-03-28 08:59:49 +02001159 case CFQ_RQ1_WRAP:
Jens Axboe5e705372006-07-13 12:39:25 +02001160 return rq2;
1161 case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both rqs wrapped */
Andreas Mohre8a99052006-03-28 08:59:49 +02001162 default:
1163 /*
1164 * Since both rqs are wrapped,
1165 * start with the one that's further behind head
1166 * (--> only *one* back seek required),
1167 * since back seek takes more time than forward.
1168 */
1169 if (s1 <= s2)
Jens Axboe5e705372006-07-13 12:39:25 +02001170 return rq1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171 else
Jens Axboe5e705372006-07-13 12:39:25 +02001172 return rq2;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173 }
1174}
1175
Jens Axboe498d3aa22007-04-26 12:54:48 +02001176/*
1177 * The below is leftmost cache rbtree addon
1178 */
Jens Axboe08717142008-01-28 11:38:15 +01001179static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root)
Jens Axboecc09e292007-04-26 12:53:50 +02001180{
Vivek Goyal615f0252009-12-03 12:59:39 -05001181 /* Service tree is empty */
1182 if (!root->count)
1183 return NULL;
1184
Jens Axboecc09e292007-04-26 12:53:50 +02001185 if (!root->left)
1186 root->left = rb_first(&root->rb);
1187
Jens Axboe08717142008-01-28 11:38:15 +01001188 if (root->left)
1189 return rb_entry(root->left, struct cfq_queue, rb_node);
1190
1191 return NULL;
Jens Axboecc09e292007-04-26 12:53:50 +02001192}
1193
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05001194static struct cfq_group *cfq_rb_first_group(struct cfq_rb_root *root)
1195{
1196 if (!root->left)
1197 root->left = rb_first(&root->rb);
1198
1199 if (root->left)
1200 return rb_entry_cfqg(root->left);
1201
1202 return NULL;
1203}
1204
Jens Axboea36e71f2009-04-15 12:15:11 +02001205static void rb_erase_init(struct rb_node *n, struct rb_root *root)
1206{
1207 rb_erase(n, root);
1208 RB_CLEAR_NODE(n);
1209}
1210
Jens Axboecc09e292007-04-26 12:53:50 +02001211static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root)
1212{
1213 if (root->left == n)
1214 root->left = NULL;
Jens Axboea36e71f2009-04-15 12:15:11 +02001215 rb_erase_init(n, &root->rb);
Corrado Zoccoloaa6f6a32009-10-26 22:44:33 +01001216 --root->count;
Jens Axboecc09e292007-04-26 12:53:50 +02001217}
1218
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219/*
1220 * would be nice to take fifo expire time into account as well
1221 */
Jens Axboe5e705372006-07-13 12:39:25 +02001222static struct request *
1223cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1224 struct request *last)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225{
Jens Axboe21183b02006-07-13 12:33:14 +02001226 struct rb_node *rbnext = rb_next(&last->rb_node);
1227 struct rb_node *rbprev = rb_prev(&last->rb_node);
Jens Axboe5e705372006-07-13 12:39:25 +02001228 struct request *next = NULL, *prev = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229
Jens Axboe21183b02006-07-13 12:33:14 +02001230 BUG_ON(RB_EMPTY_NODE(&last->rb_node));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231
1232 if (rbprev)
Jens Axboe5e705372006-07-13 12:39:25 +02001233 prev = rb_entry_rq(rbprev);
Jens Axboe21183b02006-07-13 12:33:14 +02001234
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235 if (rbnext)
Jens Axboe5e705372006-07-13 12:39:25 +02001236 next = rb_entry_rq(rbnext);
Jens Axboe21183b02006-07-13 12:33:14 +02001237 else {
1238 rbnext = rb_first(&cfqq->sort_list);
1239 if (rbnext && rbnext != &last->rb_node)
Jens Axboe5e705372006-07-13 12:39:25 +02001240 next = rb_entry_rq(rbnext);
Jens Axboe21183b02006-07-13 12:33:14 +02001241 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242
Corrado Zoccolocf7c25c2009-11-08 17:16:46 +01001243 return cfq_choose_req(cfqd, next, prev, blk_rq_pos(last));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001244}
1245
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001246static u64 cfq_slice_offset(struct cfq_data *cfqd,
1247 struct cfq_queue *cfqq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001248{
Jens Axboed9e76202007-04-20 14:27:50 +02001249 /*
1250 * just an approximation, should be ok.
1251 */
Vivek Goyalcdb16e82009-12-03 12:59:38 -05001252 return (cfqq->cfqg->nr_cfqq - 1) * (cfq_prio_slice(cfqd, 1, 0) -
Jens Axboe464191c2009-11-30 09:38:13 +01001253 cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio));
Jens Axboed9e76202007-04-20 14:27:50 +02001254}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001255
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05001256static inline s64
1257cfqg_key(struct cfq_rb_root *st, struct cfq_group *cfqg)
1258{
1259 return cfqg->vdisktime - st->min_vdisktime;
1260}
1261
1262static void
1263__cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
1264{
1265 struct rb_node **node = &st->rb.rb_node;
1266 struct rb_node *parent = NULL;
1267 struct cfq_group *__cfqg;
1268 s64 key = cfqg_key(st, cfqg);
1269 int left = 1;
1270
1271 while (*node != NULL) {
1272 parent = *node;
1273 __cfqg = rb_entry_cfqg(parent);
1274
1275 if (key < cfqg_key(st, __cfqg))
1276 node = &parent->rb_left;
1277 else {
1278 node = &parent->rb_right;
1279 left = 0;
1280 }
1281 }
1282
1283 if (left)
1284 st->left = &cfqg->rb_node;
1285
1286 rb_link_node(&cfqg->rb_node, parent, node);
1287 rb_insert_color(&cfqg->rb_node, &st->rb);
1288}
1289
Toshiaki Makita7b5af5c2014-08-28 17:14:58 +09001290/*
1291 * This has to be called only on activation of cfqg
1292 */
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05001293static void
Justin TerAvest8184f932011-03-17 16:12:36 +01001294cfq_update_group_weight(struct cfq_group *cfqg)
1295{
Tejun Heo3381cb82012-04-01 14:38:44 -07001296 if (cfqg->new_weight) {
Justin TerAvest8184f932011-03-17 16:12:36 +01001297 cfqg->weight = cfqg->new_weight;
Tejun Heo3381cb82012-04-01 14:38:44 -07001298 cfqg->new_weight = 0;
Justin TerAvest8184f932011-03-17 16:12:36 +01001299 }
Toshiaki Makitae15693e2014-08-26 20:56:36 +09001300}
1301
1302static void
1303cfq_update_group_leaf_weight(struct cfq_group *cfqg)
1304{
1305 BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
Tejun Heoe71357e2013-01-09 08:05:10 -08001306
1307 if (cfqg->new_leaf_weight) {
1308 cfqg->leaf_weight = cfqg->new_leaf_weight;
1309 cfqg->new_leaf_weight = 0;
1310 }
Justin TerAvest8184f932011-03-17 16:12:36 +01001311}
1312
1313static void
1314cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
1315{
Tejun Heo1d3650f2013-01-09 08:05:11 -08001316 unsigned int vfr = 1 << CFQ_SERVICE_SHIFT; /* start with 1 */
Tejun Heo7918ffb2013-01-09 08:05:11 -08001317 struct cfq_group *pos = cfqg;
Tejun Heo1d3650f2013-01-09 08:05:11 -08001318 struct cfq_group *parent;
Tejun Heo7918ffb2013-01-09 08:05:11 -08001319 bool propagate;
1320
1321 /* add to the service tree */
Justin TerAvest8184f932011-03-17 16:12:36 +01001322 BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
1323
Toshiaki Makita7b5af5c2014-08-28 17:14:58 +09001324 /*
1325 * Update leaf_weight. We cannot update weight at this point
1326 * because cfqg might already have been activated and is
1327 * contributing its current weight to the parent's child_weight.
1328 */
Toshiaki Makitae15693e2014-08-26 20:56:36 +09001329 cfq_update_group_leaf_weight(cfqg);
Justin TerAvest8184f932011-03-17 16:12:36 +01001330 __cfq_group_service_tree_add(st, cfqg);
Tejun Heo7918ffb2013-01-09 08:05:11 -08001331
1332 /*
Tejun Heo1d3650f2013-01-09 08:05:11 -08001333 * Activate @cfqg and calculate the portion of vfraction @cfqg is
1334 * entitled to. vfraction is calculated by walking the tree
1335 * towards the root calculating the fraction it has at each level.
1336 * The compounded ratio is how much vfraction @cfqg owns.
1337 *
1338 * Start with the proportion tasks in this cfqg has against active
1339 * children cfqgs - its leaf_weight against children_weight.
Tejun Heo7918ffb2013-01-09 08:05:11 -08001340 */
1341 propagate = !pos->nr_active++;
1342 pos->children_weight += pos->leaf_weight;
Tejun Heo1d3650f2013-01-09 08:05:11 -08001343 vfr = vfr * pos->leaf_weight / pos->children_weight;
Tejun Heo7918ffb2013-01-09 08:05:11 -08001344
Tejun Heo1d3650f2013-01-09 08:05:11 -08001345 /*
1346 * Compound ->weight walking up the tree. Both activation and
1347 * vfraction calculation are done in the same loop. Propagation
1348 * stops once an already activated node is met. vfraction
1349 * calculation should always continue to the root.
1350 */
Tejun Heod02f7aa2013-01-09 08:05:11 -08001351 while ((parent = cfqg_parent(pos))) {
Tejun Heo1d3650f2013-01-09 08:05:11 -08001352 if (propagate) {
Toshiaki Makitae15693e2014-08-26 20:56:36 +09001353 cfq_update_group_weight(pos);
Tejun Heo1d3650f2013-01-09 08:05:11 -08001354 propagate = !parent->nr_active++;
1355 parent->children_weight += pos->weight;
1356 }
1357 vfr = vfr * pos->weight / parent->children_weight;
Tejun Heo7918ffb2013-01-09 08:05:11 -08001358 pos = parent;
1359 }
Tejun Heo1d3650f2013-01-09 08:05:11 -08001360
1361 cfqg->vfraction = max_t(unsigned, vfr, 1);
Justin TerAvest8184f932011-03-17 16:12:36 +01001362}
1363
1364static void
1365cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05001366{
1367 struct cfq_rb_root *st = &cfqd->grp_service_tree;
1368 struct cfq_group *__cfqg;
1369 struct rb_node *n;
1370
1371 cfqg->nr_cfqq++;
Gui Jianfeng760701b2010-11-30 20:52:47 +01001372 if (!RB_EMPTY_NODE(&cfqg->rb_node))
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05001373 return;
1374
1375 /*
1376 * Currently put the group at the end. Later implement something
1377 * so that groups get lesser vtime based on their weights, so that
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001378 * if group does not loose all if it was not continuously backlogged.
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05001379 */
1380 n = rb_last(&st->rb);
1381 if (n) {
1382 __cfqg = rb_entry_cfqg(n);
1383 cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY;
1384 } else
1385 cfqg->vdisktime = st->min_vdisktime;
Justin TerAvest8184f932011-03-17 16:12:36 +01001386 cfq_group_service_tree_add(st, cfqg);
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05001387}
1388
1389static void
Justin TerAvest8184f932011-03-17 16:12:36 +01001390cfq_group_service_tree_del(struct cfq_rb_root *st, struct cfq_group *cfqg)
1391{
Tejun Heo7918ffb2013-01-09 08:05:11 -08001392 struct cfq_group *pos = cfqg;
1393 bool propagate;
1394
1395 /*
1396 * Undo activation from cfq_group_service_tree_add(). Deactivate
1397 * @cfqg and propagate deactivation upwards.
1398 */
1399 propagate = !--pos->nr_active;
1400 pos->children_weight -= pos->leaf_weight;
1401
1402 while (propagate) {
Tejun Heod02f7aa2013-01-09 08:05:11 -08001403 struct cfq_group *parent = cfqg_parent(pos);
Tejun Heo7918ffb2013-01-09 08:05:11 -08001404
1405 /* @pos has 0 nr_active at this point */
1406 WARN_ON_ONCE(pos->children_weight);
Tejun Heo1d3650f2013-01-09 08:05:11 -08001407 pos->vfraction = 0;
Tejun Heo7918ffb2013-01-09 08:05:11 -08001408
1409 if (!parent)
1410 break;
1411
1412 propagate = !--parent->nr_active;
1413 parent->children_weight -= pos->weight;
1414 pos = parent;
1415 }
1416
1417 /* remove from the service tree */
Justin TerAvest8184f932011-03-17 16:12:36 +01001418 if (!RB_EMPTY_NODE(&cfqg->rb_node))
1419 cfq_rb_erase(&cfqg->rb_node, st);
1420}
1421
1422static void
1423cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05001424{
1425 struct cfq_rb_root *st = &cfqd->grp_service_tree;
1426
1427 BUG_ON(cfqg->nr_cfqq < 1);
1428 cfqg->nr_cfqq--;
Vivek Goyal25bc6b02009-12-03 12:59:43 -05001429
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05001430 /* If there are other cfq queues under this group, don't delete it */
1431 if (cfqg->nr_cfqq)
1432 return;
1433
Vivek Goyal2868ef72009-12-03 12:59:48 -05001434 cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
Justin TerAvest8184f932011-03-17 16:12:36 +01001435 cfq_group_service_tree_del(st, cfqg);
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04001436 cfqg->saved_wl_slice = 0;
Tejun Heo155fead2012-04-01 14:38:44 -07001437 cfqg_stats_update_dequeue(cfqg);
Vivek Goyaldae739e2009-12-03 12:59:45 -05001438}
1439
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001440static inline u64 cfq_cfqq_slice_usage(struct cfq_queue *cfqq,
1441 u64 *unaccounted_time)
Vivek Goyaldae739e2009-12-03 12:59:45 -05001442{
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001443 u64 slice_used;
1444 u64 now = ktime_get_ns();
Vivek Goyaldae739e2009-12-03 12:59:45 -05001445
1446 /*
1447 * Queue got expired before even a single request completed or
1448 * got expired immediately after first request completion.
1449 */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001450 if (!cfqq->slice_start || cfqq->slice_start == now) {
Vivek Goyaldae739e2009-12-03 12:59:45 -05001451 /*
1452 * Also charge the seek time incurred to the group, otherwise
1453 * if there are mutiple queues in the group, each can dispatch
1454 * a single request on seeky media and cause lots of seek time
1455 * and group will never know it.
1456 */
Jan Kara0b31c102016-06-28 09:04:02 +02001457 slice_used = max_t(u64, (now - cfqq->dispatch_start),
1458 jiffies_to_nsecs(1));
Vivek Goyaldae739e2009-12-03 12:59:45 -05001459 } else {
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001460 slice_used = now - cfqq->slice_start;
Justin TerAvest167400d2011-03-12 16:54:00 +01001461 if (slice_used > cfqq->allocated_slice) {
1462 *unaccounted_time = slice_used - cfqq->allocated_slice;
Vivek Goyalf75edf22009-12-03 12:59:53 -05001463 slice_used = cfqq->allocated_slice;
Justin TerAvest167400d2011-03-12 16:54:00 +01001464 }
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001465 if (cfqq->slice_start > cfqq->dispatch_start)
Justin TerAvest167400d2011-03-12 16:54:00 +01001466 *unaccounted_time += cfqq->slice_start -
1467 cfqq->dispatch_start;
Vivek Goyaldae739e2009-12-03 12:59:45 -05001468 }
1469
Vivek Goyaldae739e2009-12-03 12:59:45 -05001470 return slice_used;
1471}
1472
1473static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
Vivek Goyale5ff0822010-04-26 19:25:11 +02001474 struct cfq_queue *cfqq)
Vivek Goyaldae739e2009-12-03 12:59:45 -05001475{
1476 struct cfq_rb_root *st = &cfqd->grp_service_tree;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001477 u64 used_sl, charge, unaccounted_sl = 0;
Vivek Goyalf26bd1f2009-12-03 12:59:54 -05001478 int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg)
1479 - cfqg->service_tree_idle.count;
Tejun Heo1d3650f2013-01-09 08:05:11 -08001480 unsigned int vfr;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001481 u64 now = ktime_get_ns();
Vivek Goyaldae739e2009-12-03 12:59:45 -05001482
Vivek Goyalf26bd1f2009-12-03 12:59:54 -05001483 BUG_ON(nr_sync < 0);
Justin TerAvest167400d2011-03-12 16:54:00 +01001484 used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl);
Vivek Goyalf26bd1f2009-12-03 12:59:54 -05001485
Vivek Goyal02b35082010-08-23 12:23:53 +02001486 if (iops_mode(cfqd))
1487 charge = cfqq->slice_dispatch;
1488 else if (!cfq_cfqq_sync(cfqq) && !nr_sync)
1489 charge = cfqq->allocated_slice;
Vivek Goyaldae739e2009-12-03 12:59:45 -05001490
Tejun Heo1d3650f2013-01-09 08:05:11 -08001491 /*
1492 * Can't update vdisktime while on service tree and cfqg->vfraction
1493 * is valid only while on it. Cache vfr, leave the service tree,
1494 * update vdisktime and go back on. The re-addition to the tree
1495 * will also update the weights as necessary.
1496 */
1497 vfr = cfqg->vfraction;
Justin TerAvest8184f932011-03-17 16:12:36 +01001498 cfq_group_service_tree_del(st, cfqg);
Tejun Heo1d3650f2013-01-09 08:05:11 -08001499 cfqg->vdisktime += cfqg_scale_charge(charge, vfr);
Justin TerAvest8184f932011-03-17 16:12:36 +01001500 cfq_group_service_tree_add(st, cfqg);
Vivek Goyaldae739e2009-12-03 12:59:45 -05001501
1502 /* This group is being expired. Save the context */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001503 if (cfqd->workload_expires > now) {
1504 cfqg->saved_wl_slice = cfqd->workload_expires - now;
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04001505 cfqg->saved_wl_type = cfqd->serving_wl_type;
1506 cfqg->saved_wl_class = cfqd->serving_wl_class;
Vivek Goyaldae739e2009-12-03 12:59:45 -05001507 } else
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04001508 cfqg->saved_wl_slice = 0;
Vivek Goyal2868ef72009-12-03 12:59:48 -05001509
1510 cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
1511 st->min_vdisktime);
Joe Perchesfd16d262011-06-13 10:42:49 +02001512 cfq_log_cfqq(cfqq->cfqd, cfqq,
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001513 "sl_used=%llu disp=%llu charge=%llu iops=%u sect=%lu",
Joe Perchesfd16d262011-06-13 10:42:49 +02001514 used_sl, cfqq->slice_dispatch, charge,
1515 iops_mode(cfqd), cfqq->nr_sectors);
Tejun Heo155fead2012-04-01 14:38:44 -07001516 cfqg_stats_update_timeslice_used(cfqg, used_sl, unaccounted_sl);
1517 cfqg_stats_set_start_empty_time(cfqg);
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05001518}
1519
Tejun Heof51b8022012-03-05 13:15:05 -08001520/**
1521 * cfq_init_cfqg_base - initialize base part of a cfq_group
1522 * @cfqg: cfq_group to initialize
1523 *
1524 * Initialize the base part which is used whether %CONFIG_CFQ_GROUP_IOSCHED
1525 * is enabled or not.
1526 */
1527static void cfq_init_cfqg_base(struct cfq_group *cfqg)
1528{
1529 struct cfq_rb_root *st;
1530 int i, j;
1531
1532 for_each_cfqg_st(cfqg, i, j, st)
1533 *st = CFQ_RB_ROOT;
1534 RB_CLEAR_NODE(&cfqg->rb_node);
1535
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06001536 cfqg->ttime.last_end_request = ktime_get_ns();
Tejun Heof51b8022012-03-05 13:15:05 -08001537}
1538
Vivek Goyal25fb5162009-12-03 12:59:46 -05001539#ifdef CONFIG_CFQ_GROUP_IOSCHED
Tejun Heo69d7fde2015-08-18 14:55:36 -07001540static int __cfq_set_weight(struct cgroup_subsys_state *css, u64 val,
1541 bool on_dfl, bool reset_dev, bool is_leaf_weight);
1542
Tejun Heo24bdb8e2015-08-18 14:55:22 -07001543static void cfqg_stats_exit(struct cfqg_stats *stats)
Peter Zijlstra90d38392013-11-12 19:42:14 -08001544{
Tejun Heo24bdb8e2015-08-18 14:55:22 -07001545 blkg_rwstat_exit(&stats->merged);
1546 blkg_rwstat_exit(&stats->service_time);
1547 blkg_rwstat_exit(&stats->wait_time);
1548 blkg_rwstat_exit(&stats->queued);
Tejun Heo24bdb8e2015-08-18 14:55:22 -07001549 blkg_stat_exit(&stats->time);
1550#ifdef CONFIG_DEBUG_BLK_CGROUP
1551 blkg_stat_exit(&stats->unaccounted_time);
1552 blkg_stat_exit(&stats->avg_queue_size_sum);
1553 blkg_stat_exit(&stats->avg_queue_size_samples);
1554 blkg_stat_exit(&stats->dequeue);
1555 blkg_stat_exit(&stats->group_wait_time);
1556 blkg_stat_exit(&stats->idle_time);
1557 blkg_stat_exit(&stats->empty_time);
1558#endif
1559}
1560
1561static int cfqg_stats_init(struct cfqg_stats *stats, gfp_t gfp)
1562{
Tejun Heo77ea7332015-08-18 14:55:24 -07001563 if (blkg_rwstat_init(&stats->merged, gfp) ||
Tejun Heo24bdb8e2015-08-18 14:55:22 -07001564 blkg_rwstat_init(&stats->service_time, gfp) ||
1565 blkg_rwstat_init(&stats->wait_time, gfp) ||
1566 blkg_rwstat_init(&stats->queued, gfp) ||
Tejun Heo24bdb8e2015-08-18 14:55:22 -07001567 blkg_stat_init(&stats->time, gfp))
1568 goto err;
Peter Zijlstra90d38392013-11-12 19:42:14 -08001569
1570#ifdef CONFIG_DEBUG_BLK_CGROUP
Tejun Heo24bdb8e2015-08-18 14:55:22 -07001571 if (blkg_stat_init(&stats->unaccounted_time, gfp) ||
1572 blkg_stat_init(&stats->avg_queue_size_sum, gfp) ||
1573 blkg_stat_init(&stats->avg_queue_size_samples, gfp) ||
1574 blkg_stat_init(&stats->dequeue, gfp) ||
1575 blkg_stat_init(&stats->group_wait_time, gfp) ||
1576 blkg_stat_init(&stats->idle_time, gfp) ||
1577 blkg_stat_init(&stats->empty_time, gfp))
1578 goto err;
Peter Zijlstra90d38392013-11-12 19:42:14 -08001579#endif
Tejun Heo24bdb8e2015-08-18 14:55:22 -07001580 return 0;
1581err:
1582 cfqg_stats_exit(stats);
1583 return -ENOMEM;
Peter Zijlstra90d38392013-11-12 19:42:14 -08001584}
1585
Tejun Heoe4a9bde2015-08-18 14:55:16 -07001586static struct blkcg_policy_data *cfq_cpd_alloc(gfp_t gfp)
1587{
1588 struct cfq_group_data *cgd;
1589
Tejun Heoebc4ff62016-11-10 11:16:37 -05001590 cgd = kzalloc(sizeof(*cgd), gfp);
Tejun Heoe4a9bde2015-08-18 14:55:16 -07001591 if (!cgd)
1592 return NULL;
1593 return &cgd->cpd;
1594}
1595
Tejun Heo81437642015-08-18 14:55:15 -07001596static void cfq_cpd_init(struct blkcg_policy_data *cpd)
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001597{
Tejun Heo81437642015-08-18 14:55:15 -07001598 struct cfq_group_data *cgd = cpd_to_cfqgd(cpd);
Tejun Heo9e10a132015-09-18 11:56:28 -04001599 unsigned int weight = cgroup_subsys_on_dfl(io_cgrp_subsys) ?
Tejun Heo69d7fde2015-08-18 14:55:36 -07001600 CGROUP_WEIGHT_DFL : CFQ_WEIGHT_LEGACY_DFL;
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001601
Tejun Heo69d7fde2015-08-18 14:55:36 -07001602 if (cpd_to_blkcg(cpd) == &blkcg_root)
1603 weight *= 2;
1604
1605 cgd->weight = weight;
1606 cgd->leaf_weight = weight;
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001607}
1608
Tejun Heoe4a9bde2015-08-18 14:55:16 -07001609static void cfq_cpd_free(struct blkcg_policy_data *cpd)
1610{
1611 kfree(cpd_to_cfqgd(cpd));
1612}
1613
Tejun Heo69d7fde2015-08-18 14:55:36 -07001614static void cfq_cpd_bind(struct blkcg_policy_data *cpd)
1615{
1616 struct blkcg *blkcg = cpd_to_blkcg(cpd);
Tejun Heo9e10a132015-09-18 11:56:28 -04001617 bool on_dfl = cgroup_subsys_on_dfl(io_cgrp_subsys);
Tejun Heo69d7fde2015-08-18 14:55:36 -07001618 unsigned int weight = on_dfl ? CGROUP_WEIGHT_DFL : CFQ_WEIGHT_LEGACY_DFL;
1619
1620 if (blkcg == &blkcg_root)
1621 weight *= 2;
1622
1623 WARN_ON_ONCE(__cfq_set_weight(&blkcg->css, weight, on_dfl, true, false));
1624 WARN_ON_ONCE(__cfq_set_weight(&blkcg->css, weight, on_dfl, true, true));
1625}
1626
Tejun Heo001bea72015-08-18 14:55:11 -07001627static struct blkg_policy_data *cfq_pd_alloc(gfp_t gfp, int node)
1628{
Tejun Heob2ce2642015-08-18 14:55:13 -07001629 struct cfq_group *cfqg;
1630
1631 cfqg = kzalloc_node(sizeof(*cfqg), gfp, node);
1632 if (!cfqg)
1633 return NULL;
1634
1635 cfq_init_cfqg_base(cfqg);
Tejun Heo24bdb8e2015-08-18 14:55:22 -07001636 if (cfqg_stats_init(&cfqg->stats, gfp)) {
1637 kfree(cfqg);
1638 return NULL;
1639 }
Tejun Heob2ce2642015-08-18 14:55:13 -07001640
1641 return &cfqg->pd;
Tejun Heo001bea72015-08-18 14:55:11 -07001642}
1643
Tejun Heoa9520cd2015-08-18 14:55:14 -07001644static void cfq_pd_init(struct blkg_policy_data *pd)
Vivek Goyalf469a7b2011-05-19 15:38:23 -04001645{
Tejun Heoa9520cd2015-08-18 14:55:14 -07001646 struct cfq_group *cfqg = pd_to_cfqg(pd);
1647 struct cfq_group_data *cgd = blkcg_to_cfqgd(pd->blkg->blkcg);
Vivek Goyal25fb5162009-12-03 12:59:46 -05001648
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001649 cfqg->weight = cgd->weight;
1650 cfqg->leaf_weight = cgd->leaf_weight;
Vivek Goyal25fb5162009-12-03 12:59:46 -05001651}
1652
Tejun Heoa9520cd2015-08-18 14:55:14 -07001653static void cfq_pd_offline(struct blkg_policy_data *pd)
Tejun Heo0b399202013-01-09 08:05:13 -08001654{
Tejun Heoa9520cd2015-08-18 14:55:14 -07001655 struct cfq_group *cfqg = pd_to_cfqg(pd);
Tejun Heo60a83702015-08-18 14:55:05 -07001656 int i;
1657
1658 for (i = 0; i < IOPRIO_BE_NR; i++) {
1659 if (cfqg->async_cfqq[0][i])
1660 cfq_put_queue(cfqg->async_cfqq[0][i]);
1661 if (cfqg->async_cfqq[1][i])
1662 cfq_put_queue(cfqg->async_cfqq[1][i]);
1663 }
1664
1665 if (cfqg->async_idle_cfqq)
1666 cfq_put_queue(cfqg->async_idle_cfqq);
1667
Tejun Heo0b399202013-01-09 08:05:13 -08001668 /*
1669 * @blkg is going offline and will be ignored by
1670 * blkg_[rw]stat_recursive_sum(). Transfer stats to the parent so
1671 * that they don't get lost. If IOs complete after this point, the
1672 * stats for them will be lost. Oh well...
1673 */
Tejun Heo60a83702015-08-18 14:55:05 -07001674 cfqg_stats_xfer_dead(cfqg);
Tejun Heo0b399202013-01-09 08:05:13 -08001675}
1676
Tejun Heo001bea72015-08-18 14:55:11 -07001677static void cfq_pd_free(struct blkg_policy_data *pd)
1678{
Tejun Heo24bdb8e2015-08-18 14:55:22 -07001679 struct cfq_group *cfqg = pd_to_cfqg(pd);
1680
1681 cfqg_stats_exit(&cfqg->stats);
1682 return kfree(cfqg);
Tejun Heo001bea72015-08-18 14:55:11 -07001683}
1684
Tejun Heoa9520cd2015-08-18 14:55:14 -07001685static void cfq_pd_reset_stats(struct blkg_policy_data *pd)
Tejun Heo689665a2013-01-09 08:05:13 -08001686{
Tejun Heoa9520cd2015-08-18 14:55:14 -07001687 struct cfq_group *cfqg = pd_to_cfqg(pd);
Tejun Heo689665a2013-01-09 08:05:13 -08001688
1689 cfqg_stats_reset(&cfqg->stats);
Vivek Goyal25fb5162009-12-03 12:59:46 -05001690}
1691
Tejun Heoae118892015-08-18 14:55:20 -07001692static struct cfq_group *cfq_lookup_cfqg(struct cfq_data *cfqd,
1693 struct blkcg *blkcg)
Vivek Goyal25fb5162009-12-03 12:59:46 -05001694{
Tejun Heoae118892015-08-18 14:55:20 -07001695 struct blkcg_gq *blkg;
Vivek Goyal25fb5162009-12-03 12:59:46 -05001696
Tejun Heoae118892015-08-18 14:55:20 -07001697 blkg = blkg_lookup(blkcg, cfqd->queue);
1698 if (likely(blkg))
1699 return blkg_to_cfqg(blkg);
1700 return NULL;
Vivek Goyal25fb5162009-12-03 12:59:46 -05001701}
1702
1703static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
1704{
Vivek Goyal25fb5162009-12-03 12:59:46 -05001705 cfqq->cfqg = cfqg;
Vivek Goyalb1c35762009-12-03 12:59:47 -05001706 /* cfqq reference on cfqg */
Tejun Heoeb7d8c072012-03-23 14:02:53 +01001707 cfqg_get(cfqg);
Vivek Goyalb1c35762009-12-03 12:59:47 -05001708}
1709
Tejun Heof95a04a2012-04-16 13:57:26 -07001710static u64 cfqg_prfill_weight_device(struct seq_file *sf,
1711 struct blkg_policy_data *pd, int off)
Tejun Heo60c2bc22012-04-01 14:38:43 -07001712{
Tejun Heof95a04a2012-04-16 13:57:26 -07001713 struct cfq_group *cfqg = pd_to_cfqg(pd);
Tejun Heo3381cb82012-04-01 14:38:44 -07001714
1715 if (!cfqg->dev_weight)
Tejun Heo60c2bc22012-04-01 14:38:43 -07001716 return 0;
Tejun Heof95a04a2012-04-16 13:57:26 -07001717 return __blkg_prfill_u64(sf, pd, cfqg->dev_weight);
Tejun Heo60c2bc22012-04-01 14:38:43 -07001718}
1719
Tejun Heo2da8ca82013-12-05 12:28:04 -05001720static int cfqg_print_weight_device(struct seq_file *sf, void *v)
Tejun Heo60c2bc22012-04-01 14:38:43 -07001721{
Tejun Heo2da8ca82013-12-05 12:28:04 -05001722 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1723 cfqg_prfill_weight_device, &blkcg_policy_cfq,
1724 0, false);
Tejun Heo60c2bc22012-04-01 14:38:43 -07001725 return 0;
1726}
1727
Tejun Heoe71357e2013-01-09 08:05:10 -08001728static u64 cfqg_prfill_leaf_weight_device(struct seq_file *sf,
1729 struct blkg_policy_data *pd, int off)
1730{
1731 struct cfq_group *cfqg = pd_to_cfqg(pd);
1732
1733 if (!cfqg->dev_leaf_weight)
1734 return 0;
1735 return __blkg_prfill_u64(sf, pd, cfqg->dev_leaf_weight);
1736}
1737
Tejun Heo2da8ca82013-12-05 12:28:04 -05001738static int cfqg_print_leaf_weight_device(struct seq_file *sf, void *v)
Tejun Heoe71357e2013-01-09 08:05:10 -08001739{
Tejun Heo2da8ca82013-12-05 12:28:04 -05001740 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1741 cfqg_prfill_leaf_weight_device, &blkcg_policy_cfq,
1742 0, false);
Tejun Heoe71357e2013-01-09 08:05:10 -08001743 return 0;
1744}
1745
Tejun Heo2da8ca82013-12-05 12:28:04 -05001746static int cfq_print_weight(struct seq_file *sf, void *v)
Tejun Heo60c2bc22012-04-01 14:38:43 -07001747{
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001748 struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
Jens Axboe9470e4a2015-06-19 10:19:36 -06001749 struct cfq_group_data *cgd = blkcg_to_cfqgd(blkcg);
1750 unsigned int val = 0;
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001751
Jens Axboe9470e4a2015-06-19 10:19:36 -06001752 if (cgd)
1753 val = cgd->weight;
1754
1755 seq_printf(sf, "%u\n", val);
Tejun Heo60c2bc22012-04-01 14:38:43 -07001756 return 0;
1757}
1758
Tejun Heo2da8ca82013-12-05 12:28:04 -05001759static int cfq_print_leaf_weight(struct seq_file *sf, void *v)
Tejun Heoe71357e2013-01-09 08:05:10 -08001760{
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001761 struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
Jens Axboe9470e4a2015-06-19 10:19:36 -06001762 struct cfq_group_data *cgd = blkcg_to_cfqgd(blkcg);
1763 unsigned int val = 0;
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001764
Jens Axboe9470e4a2015-06-19 10:19:36 -06001765 if (cgd)
1766 val = cgd->leaf_weight;
1767
1768 seq_printf(sf, "%u\n", val);
Tejun Heoe71357e2013-01-09 08:05:10 -08001769 return 0;
1770}
1771
Tejun Heo451af502014-05-13 12:16:21 -04001772static ssize_t __cfqg_set_weight_device(struct kernfs_open_file *of,
1773 char *buf, size_t nbytes, loff_t off,
Tejun Heo2ee867dc2015-08-18 14:55:34 -07001774 bool on_dfl, bool is_leaf_weight)
Tejun Heo60c2bc22012-04-01 14:38:43 -07001775{
Tejun Heo69d7fde2015-08-18 14:55:36 -07001776 unsigned int min = on_dfl ? CGROUP_WEIGHT_MIN : CFQ_WEIGHT_LEGACY_MIN;
1777 unsigned int max = on_dfl ? CGROUP_WEIGHT_MAX : CFQ_WEIGHT_LEGACY_MAX;
Tejun Heo451af502014-05-13 12:16:21 -04001778 struct blkcg *blkcg = css_to_blkcg(of_css(of));
Tejun Heo60c2bc22012-04-01 14:38:43 -07001779 struct blkg_conf_ctx ctx;
Tejun Heo3381cb82012-04-01 14:38:44 -07001780 struct cfq_group *cfqg;
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001781 struct cfq_group_data *cfqgd;
Tejun Heo60c2bc22012-04-01 14:38:43 -07001782 int ret;
Tejun Heo36aa9e52015-08-18 14:55:31 -07001783 u64 v;
Tejun Heo60c2bc22012-04-01 14:38:43 -07001784
Tejun Heo3c798392012-04-16 13:57:25 -07001785 ret = blkg_conf_prep(blkcg, &blkcg_policy_cfq, buf, &ctx);
Tejun Heo60c2bc22012-04-01 14:38:43 -07001786 if (ret)
1787 return ret;
1788
Tejun Heo2ee867dc2015-08-18 14:55:34 -07001789 if (sscanf(ctx.body, "%llu", &v) == 1) {
1790 /* require "default" on dfl */
1791 ret = -ERANGE;
1792 if (!v && on_dfl)
1793 goto out_finish;
1794 } else if (!strcmp(strim(ctx.body), "default")) {
1795 v = 0;
1796 } else {
1797 ret = -EINVAL;
Tejun Heo36aa9e52015-08-18 14:55:31 -07001798 goto out_finish;
Tejun Heo2ee867dc2015-08-18 14:55:34 -07001799 }
Tejun Heo36aa9e52015-08-18 14:55:31 -07001800
Tejun Heo3381cb82012-04-01 14:38:44 -07001801 cfqg = blkg_to_cfqg(ctx.blkg);
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001802 cfqgd = blkcg_to_cfqgd(blkcg);
Jens Axboeae994ea2015-06-20 10:26:50 -06001803
Tejun Heo20386ce2015-08-18 14:55:28 -07001804 ret = -ERANGE;
Tejun Heo69d7fde2015-08-18 14:55:36 -07001805 if (!v || (v >= min && v <= max)) {
Tejun Heoe71357e2013-01-09 08:05:10 -08001806 if (!is_leaf_weight) {
Tejun Heo36aa9e52015-08-18 14:55:31 -07001807 cfqg->dev_weight = v;
1808 cfqg->new_weight = v ?: cfqgd->weight;
Tejun Heoe71357e2013-01-09 08:05:10 -08001809 } else {
Tejun Heo36aa9e52015-08-18 14:55:31 -07001810 cfqg->dev_leaf_weight = v;
1811 cfqg->new_leaf_weight = v ?: cfqgd->leaf_weight;
Tejun Heoe71357e2013-01-09 08:05:10 -08001812 }
Tejun Heo60c2bc22012-04-01 14:38:43 -07001813 ret = 0;
1814 }
Tejun Heo36aa9e52015-08-18 14:55:31 -07001815out_finish:
Tejun Heo60c2bc22012-04-01 14:38:43 -07001816 blkg_conf_finish(&ctx);
Tejun Heo451af502014-05-13 12:16:21 -04001817 return ret ?: nbytes;
Tejun Heo60c2bc22012-04-01 14:38:43 -07001818}
1819
Tejun Heo451af502014-05-13 12:16:21 -04001820static ssize_t cfqg_set_weight_device(struct kernfs_open_file *of,
1821 char *buf, size_t nbytes, loff_t off)
Tejun Heoe71357e2013-01-09 08:05:10 -08001822{
Tejun Heo2ee867dc2015-08-18 14:55:34 -07001823 return __cfqg_set_weight_device(of, buf, nbytes, off, false, false);
Tejun Heoe71357e2013-01-09 08:05:10 -08001824}
1825
Tejun Heo451af502014-05-13 12:16:21 -04001826static ssize_t cfqg_set_leaf_weight_device(struct kernfs_open_file *of,
1827 char *buf, size_t nbytes, loff_t off)
Tejun Heoe71357e2013-01-09 08:05:10 -08001828{
Tejun Heo2ee867dc2015-08-18 14:55:34 -07001829 return __cfqg_set_weight_device(of, buf, nbytes, off, false, true);
Tejun Heoe71357e2013-01-09 08:05:10 -08001830}
1831
Tejun Heodd165eb2015-08-18 14:55:33 -07001832static int __cfq_set_weight(struct cgroup_subsys_state *css, u64 val,
Tejun Heo69d7fde2015-08-18 14:55:36 -07001833 bool on_dfl, bool reset_dev, bool is_leaf_weight)
Tejun Heo60c2bc22012-04-01 14:38:43 -07001834{
Tejun Heo69d7fde2015-08-18 14:55:36 -07001835 unsigned int min = on_dfl ? CGROUP_WEIGHT_MIN : CFQ_WEIGHT_LEGACY_MIN;
1836 unsigned int max = on_dfl ? CGROUP_WEIGHT_MAX : CFQ_WEIGHT_LEGACY_MAX;
Tejun Heo182446d2013-08-08 20:11:24 -04001837 struct blkcg *blkcg = css_to_blkcg(css);
Tejun Heo3c798392012-04-16 13:57:25 -07001838 struct blkcg_gq *blkg;
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001839 struct cfq_group_data *cfqgd;
Jens Axboeae994ea2015-06-20 10:26:50 -06001840 int ret = 0;
Tejun Heo60c2bc22012-04-01 14:38:43 -07001841
Tejun Heo69d7fde2015-08-18 14:55:36 -07001842 if (val < min || val > max)
1843 return -ERANGE;
Tejun Heo60c2bc22012-04-01 14:38:43 -07001844
1845 spin_lock_irq(&blkcg->lock);
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001846 cfqgd = blkcg_to_cfqgd(blkcg);
Jens Axboeae994ea2015-06-20 10:26:50 -06001847 if (!cfqgd) {
1848 ret = -EINVAL;
1849 goto out;
1850 }
Tejun Heoe71357e2013-01-09 08:05:10 -08001851
1852 if (!is_leaf_weight)
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001853 cfqgd->weight = val;
Tejun Heoe71357e2013-01-09 08:05:10 -08001854 else
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001855 cfqgd->leaf_weight = val;
Tejun Heo60c2bc22012-04-01 14:38:43 -07001856
Sasha Levinb67bfe02013-02-27 17:06:00 -08001857 hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
Tejun Heo3381cb82012-04-01 14:38:44 -07001858 struct cfq_group *cfqg = blkg_to_cfqg(blkg);
Tejun Heo60c2bc22012-04-01 14:38:43 -07001859
Tejun Heoe71357e2013-01-09 08:05:10 -08001860 if (!cfqg)
1861 continue;
1862
1863 if (!is_leaf_weight) {
Tejun Heo69d7fde2015-08-18 14:55:36 -07001864 if (reset_dev)
1865 cfqg->dev_weight = 0;
Tejun Heoe71357e2013-01-09 08:05:10 -08001866 if (!cfqg->dev_weight)
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001867 cfqg->new_weight = cfqgd->weight;
Tejun Heoe71357e2013-01-09 08:05:10 -08001868 } else {
Tejun Heo69d7fde2015-08-18 14:55:36 -07001869 if (reset_dev)
1870 cfqg->dev_leaf_weight = 0;
Tejun Heoe71357e2013-01-09 08:05:10 -08001871 if (!cfqg->dev_leaf_weight)
Arianna Avanzinie48453c2015-06-05 23:38:42 +02001872 cfqg->new_leaf_weight = cfqgd->leaf_weight;
Tejun Heoe71357e2013-01-09 08:05:10 -08001873 }
Tejun Heo60c2bc22012-04-01 14:38:43 -07001874 }
1875
Jens Axboeae994ea2015-06-20 10:26:50 -06001876out:
Tejun Heo60c2bc22012-04-01 14:38:43 -07001877 spin_unlock_irq(&blkcg->lock);
Jens Axboeae994ea2015-06-20 10:26:50 -06001878 return ret;
Tejun Heo60c2bc22012-04-01 14:38:43 -07001879}
1880
Tejun Heo182446d2013-08-08 20:11:24 -04001881static int cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft,
1882 u64 val)
Tejun Heoe71357e2013-01-09 08:05:10 -08001883{
Tejun Heo69d7fde2015-08-18 14:55:36 -07001884 return __cfq_set_weight(css, val, false, false, false);
Tejun Heoe71357e2013-01-09 08:05:10 -08001885}
1886
Tejun Heo182446d2013-08-08 20:11:24 -04001887static int cfq_set_leaf_weight(struct cgroup_subsys_state *css,
1888 struct cftype *cft, u64 val)
Tejun Heoe71357e2013-01-09 08:05:10 -08001889{
Tejun Heo69d7fde2015-08-18 14:55:36 -07001890 return __cfq_set_weight(css, val, false, false, true);
Tejun Heoe71357e2013-01-09 08:05:10 -08001891}
1892
Tejun Heo2da8ca82013-12-05 12:28:04 -05001893static int cfqg_print_stat(struct seq_file *sf, void *v)
Tejun Heo5bc4afb12012-04-01 14:38:45 -07001894{
Tejun Heo2da8ca82013-12-05 12:28:04 -05001895 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
1896 &blkcg_policy_cfq, seq_cft(sf)->private, false);
Tejun Heo5bc4afb12012-04-01 14:38:45 -07001897 return 0;
1898}
1899
Tejun Heo2da8ca82013-12-05 12:28:04 -05001900static int cfqg_print_rwstat(struct seq_file *sf, void *v)
Tejun Heo5bc4afb12012-04-01 14:38:45 -07001901{
Tejun Heo2da8ca82013-12-05 12:28:04 -05001902 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
1903 &blkcg_policy_cfq, seq_cft(sf)->private, true);
Tejun Heo5bc4afb12012-04-01 14:38:45 -07001904 return 0;
1905}
1906
Tejun Heo43114012013-01-09 08:05:13 -08001907static u64 cfqg_prfill_stat_recursive(struct seq_file *sf,
1908 struct blkg_policy_data *pd, int off)
1909{
Tejun Heof12c74c2015-08-18 14:55:23 -07001910 u64 sum = blkg_stat_recursive_sum(pd_to_blkg(pd),
1911 &blkcg_policy_cfq, off);
Tejun Heo43114012013-01-09 08:05:13 -08001912 return __blkg_prfill_u64(sf, pd, sum);
1913}
1914
1915static u64 cfqg_prfill_rwstat_recursive(struct seq_file *sf,
1916 struct blkg_policy_data *pd, int off)
1917{
Tejun Heof12c74c2015-08-18 14:55:23 -07001918 struct blkg_rwstat sum = blkg_rwstat_recursive_sum(pd_to_blkg(pd),
1919 &blkcg_policy_cfq, off);
Tejun Heo43114012013-01-09 08:05:13 -08001920 return __blkg_prfill_rwstat(sf, pd, &sum);
1921}
1922
Tejun Heo2da8ca82013-12-05 12:28:04 -05001923static int cfqg_print_stat_recursive(struct seq_file *sf, void *v)
Tejun Heo43114012013-01-09 08:05:13 -08001924{
Tejun Heo2da8ca82013-12-05 12:28:04 -05001925 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1926 cfqg_prfill_stat_recursive, &blkcg_policy_cfq,
1927 seq_cft(sf)->private, false);
Tejun Heo43114012013-01-09 08:05:13 -08001928 return 0;
1929}
1930
Tejun Heo2da8ca82013-12-05 12:28:04 -05001931static int cfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
Tejun Heo43114012013-01-09 08:05:13 -08001932{
Tejun Heo2da8ca82013-12-05 12:28:04 -05001933 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1934 cfqg_prfill_rwstat_recursive, &blkcg_policy_cfq,
1935 seq_cft(sf)->private, true);
Tejun Heo43114012013-01-09 08:05:13 -08001936 return 0;
1937}
1938
Tejun Heo702747c2015-08-18 14:55:25 -07001939static u64 cfqg_prfill_sectors(struct seq_file *sf, struct blkg_policy_data *pd,
1940 int off)
1941{
1942 u64 sum = blkg_rwstat_total(&pd->blkg->stat_bytes);
1943
1944 return __blkg_prfill_u64(sf, pd, sum >> 9);
1945}
1946
1947static int cfqg_print_stat_sectors(struct seq_file *sf, void *v)
1948{
1949 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1950 cfqg_prfill_sectors, &blkcg_policy_cfq, 0, false);
1951 return 0;
1952}
1953
1954static u64 cfqg_prfill_sectors_recursive(struct seq_file *sf,
1955 struct blkg_policy_data *pd, int off)
1956{
1957 struct blkg_rwstat tmp = blkg_rwstat_recursive_sum(pd->blkg, NULL,
1958 offsetof(struct blkcg_gq, stat_bytes));
1959 u64 sum = atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) +
1960 atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]);
1961
1962 return __blkg_prfill_u64(sf, pd, sum >> 9);
1963}
1964
1965static int cfqg_print_stat_sectors_recursive(struct seq_file *sf, void *v)
1966{
1967 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1968 cfqg_prfill_sectors_recursive, &blkcg_policy_cfq, 0,
1969 false);
1970 return 0;
1971}
1972
Tejun Heo60c2bc22012-04-01 14:38:43 -07001973#ifdef CONFIG_DEBUG_BLK_CGROUP
Tejun Heof95a04a2012-04-16 13:57:26 -07001974static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf,
1975 struct blkg_policy_data *pd, int off)
Tejun Heo60c2bc22012-04-01 14:38:43 -07001976{
Tejun Heof95a04a2012-04-16 13:57:26 -07001977 struct cfq_group *cfqg = pd_to_cfqg(pd);
Tejun Heo155fead2012-04-01 14:38:44 -07001978 u64 samples = blkg_stat_read(&cfqg->stats.avg_queue_size_samples);
Tejun Heo60c2bc22012-04-01 14:38:43 -07001979 u64 v = 0;
1980
1981 if (samples) {
Tejun Heo155fead2012-04-01 14:38:44 -07001982 v = blkg_stat_read(&cfqg->stats.avg_queue_size_sum);
Anatol Pomozovf3cff252013-09-22 12:43:47 -06001983 v = div64_u64(v, samples);
Tejun Heo60c2bc22012-04-01 14:38:43 -07001984 }
Tejun Heof95a04a2012-04-16 13:57:26 -07001985 __blkg_prfill_u64(sf, pd, v);
Tejun Heo60c2bc22012-04-01 14:38:43 -07001986 return 0;
1987}
1988
1989/* print avg_queue_size */
Tejun Heo2da8ca82013-12-05 12:28:04 -05001990static int cfqg_print_avg_queue_size(struct seq_file *sf, void *v)
Tejun Heo60c2bc22012-04-01 14:38:43 -07001991{
Tejun Heo2da8ca82013-12-05 12:28:04 -05001992 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1993 cfqg_prfill_avg_queue_size, &blkcg_policy_cfq,
1994 0, false);
Tejun Heo60c2bc22012-04-01 14:38:43 -07001995 return 0;
1996}
1997#endif /* CONFIG_DEBUG_BLK_CGROUP */
1998
Tejun Heo880f50e2015-08-18 14:55:30 -07001999static struct cftype cfq_blkcg_legacy_files[] = {
Tejun Heo1d3650f2013-01-09 08:05:11 -08002000 /* on root, weight is mapped to leaf_weight */
Tejun Heo60c2bc22012-04-01 14:38:43 -07002001 {
2002 .name = "weight_device",
Tejun Heo1d3650f2013-01-09 08:05:11 -08002003 .flags = CFTYPE_ONLY_ON_ROOT,
Tejun Heo2da8ca82013-12-05 12:28:04 -05002004 .seq_show = cfqg_print_leaf_weight_device,
Tejun Heo451af502014-05-13 12:16:21 -04002005 .write = cfqg_set_leaf_weight_device,
Tejun Heo1d3650f2013-01-09 08:05:11 -08002006 },
2007 {
2008 .name = "weight",
2009 .flags = CFTYPE_ONLY_ON_ROOT,
Tejun Heo2da8ca82013-12-05 12:28:04 -05002010 .seq_show = cfq_print_leaf_weight,
Tejun Heo1d3650f2013-01-09 08:05:11 -08002011 .write_u64 = cfq_set_leaf_weight,
2012 },
2013
2014 /* no such mapping necessary for !roots */
2015 {
2016 .name = "weight_device",
2017 .flags = CFTYPE_NOT_ON_ROOT,
Tejun Heo2da8ca82013-12-05 12:28:04 -05002018 .seq_show = cfqg_print_weight_device,
Tejun Heo451af502014-05-13 12:16:21 -04002019 .write = cfqg_set_weight_device,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002020 },
2021 {
2022 .name = "weight",
Tejun Heoe71357e2013-01-09 08:05:10 -08002023 .flags = CFTYPE_NOT_ON_ROOT,
Tejun Heo2da8ca82013-12-05 12:28:04 -05002024 .seq_show = cfq_print_weight,
Tejun Heo3381cb82012-04-01 14:38:44 -07002025 .write_u64 = cfq_set_weight,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002026 },
Tejun Heo1d3650f2013-01-09 08:05:11 -08002027
2028 {
2029 .name = "leaf_weight_device",
Tejun Heo2da8ca82013-12-05 12:28:04 -05002030 .seq_show = cfqg_print_leaf_weight_device,
Tejun Heo451af502014-05-13 12:16:21 -04002031 .write = cfqg_set_leaf_weight_device,
Tejun Heoe71357e2013-01-09 08:05:10 -08002032 },
2033 {
2034 .name = "leaf_weight",
Tejun Heo2da8ca82013-12-05 12:28:04 -05002035 .seq_show = cfq_print_leaf_weight,
Tejun Heoe71357e2013-01-09 08:05:10 -08002036 .write_u64 = cfq_set_leaf_weight,
2037 },
2038
Tejun Heo43114012013-01-09 08:05:13 -08002039 /* statistics, covers only the tasks in the cfqg */
Tejun Heo60c2bc22012-04-01 14:38:43 -07002040 {
2041 .name = "time",
Tejun Heo5bc4afb12012-04-01 14:38:45 -07002042 .private = offsetof(struct cfq_group, stats.time),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002043 .seq_show = cfqg_print_stat,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002044 },
2045 {
2046 .name = "sectors",
Tejun Heo702747c2015-08-18 14:55:25 -07002047 .seq_show = cfqg_print_stat_sectors,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002048 },
2049 {
2050 .name = "io_service_bytes",
Tejun Heo77ea7332015-08-18 14:55:24 -07002051 .private = (unsigned long)&blkcg_policy_cfq,
2052 .seq_show = blkg_print_stat_bytes,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002053 },
2054 {
2055 .name = "io_serviced",
Tejun Heo77ea7332015-08-18 14:55:24 -07002056 .private = (unsigned long)&blkcg_policy_cfq,
2057 .seq_show = blkg_print_stat_ios,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002058 },
2059 {
2060 .name = "io_service_time",
Tejun Heo5bc4afb12012-04-01 14:38:45 -07002061 .private = offsetof(struct cfq_group, stats.service_time),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002062 .seq_show = cfqg_print_rwstat,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002063 },
2064 {
2065 .name = "io_wait_time",
Tejun Heo5bc4afb12012-04-01 14:38:45 -07002066 .private = offsetof(struct cfq_group, stats.wait_time),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002067 .seq_show = cfqg_print_rwstat,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002068 },
2069 {
2070 .name = "io_merged",
Tejun Heo5bc4afb12012-04-01 14:38:45 -07002071 .private = offsetof(struct cfq_group, stats.merged),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002072 .seq_show = cfqg_print_rwstat,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002073 },
2074 {
2075 .name = "io_queued",
Tejun Heo5bc4afb12012-04-01 14:38:45 -07002076 .private = offsetof(struct cfq_group, stats.queued),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002077 .seq_show = cfqg_print_rwstat,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002078 },
Tejun Heo43114012013-01-09 08:05:13 -08002079
2080 /* the same statictics which cover the cfqg and its descendants */
2081 {
2082 .name = "time_recursive",
2083 .private = offsetof(struct cfq_group, stats.time),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002084 .seq_show = cfqg_print_stat_recursive,
Tejun Heo43114012013-01-09 08:05:13 -08002085 },
2086 {
2087 .name = "sectors_recursive",
Tejun Heo702747c2015-08-18 14:55:25 -07002088 .seq_show = cfqg_print_stat_sectors_recursive,
Tejun Heo43114012013-01-09 08:05:13 -08002089 },
2090 {
2091 .name = "io_service_bytes_recursive",
Tejun Heo77ea7332015-08-18 14:55:24 -07002092 .private = (unsigned long)&blkcg_policy_cfq,
2093 .seq_show = blkg_print_stat_bytes_recursive,
Tejun Heo43114012013-01-09 08:05:13 -08002094 },
2095 {
2096 .name = "io_serviced_recursive",
Tejun Heo77ea7332015-08-18 14:55:24 -07002097 .private = (unsigned long)&blkcg_policy_cfq,
2098 .seq_show = blkg_print_stat_ios_recursive,
Tejun Heo43114012013-01-09 08:05:13 -08002099 },
2100 {
2101 .name = "io_service_time_recursive",
2102 .private = offsetof(struct cfq_group, stats.service_time),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002103 .seq_show = cfqg_print_rwstat_recursive,
Tejun Heo43114012013-01-09 08:05:13 -08002104 },
2105 {
2106 .name = "io_wait_time_recursive",
2107 .private = offsetof(struct cfq_group, stats.wait_time),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002108 .seq_show = cfqg_print_rwstat_recursive,
Tejun Heo43114012013-01-09 08:05:13 -08002109 },
2110 {
2111 .name = "io_merged_recursive",
2112 .private = offsetof(struct cfq_group, stats.merged),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002113 .seq_show = cfqg_print_rwstat_recursive,
Tejun Heo43114012013-01-09 08:05:13 -08002114 },
2115 {
2116 .name = "io_queued_recursive",
2117 .private = offsetof(struct cfq_group, stats.queued),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002118 .seq_show = cfqg_print_rwstat_recursive,
Tejun Heo43114012013-01-09 08:05:13 -08002119 },
Tejun Heo60c2bc22012-04-01 14:38:43 -07002120#ifdef CONFIG_DEBUG_BLK_CGROUP
2121 {
2122 .name = "avg_queue_size",
Tejun Heo2da8ca82013-12-05 12:28:04 -05002123 .seq_show = cfqg_print_avg_queue_size,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002124 },
2125 {
2126 .name = "group_wait_time",
Tejun Heo5bc4afb12012-04-01 14:38:45 -07002127 .private = offsetof(struct cfq_group, stats.group_wait_time),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002128 .seq_show = cfqg_print_stat,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002129 },
2130 {
2131 .name = "idle_time",
Tejun Heo5bc4afb12012-04-01 14:38:45 -07002132 .private = offsetof(struct cfq_group, stats.idle_time),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002133 .seq_show = cfqg_print_stat,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002134 },
2135 {
2136 .name = "empty_time",
Tejun Heo5bc4afb12012-04-01 14:38:45 -07002137 .private = offsetof(struct cfq_group, stats.empty_time),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002138 .seq_show = cfqg_print_stat,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002139 },
2140 {
2141 .name = "dequeue",
Tejun Heo5bc4afb12012-04-01 14:38:45 -07002142 .private = offsetof(struct cfq_group, stats.dequeue),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002143 .seq_show = cfqg_print_stat,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002144 },
2145 {
2146 .name = "unaccounted_time",
Tejun Heo5bc4afb12012-04-01 14:38:45 -07002147 .private = offsetof(struct cfq_group, stats.unaccounted_time),
Tejun Heo2da8ca82013-12-05 12:28:04 -05002148 .seq_show = cfqg_print_stat,
Tejun Heo60c2bc22012-04-01 14:38:43 -07002149 },
2150#endif /* CONFIG_DEBUG_BLK_CGROUP */
2151 { } /* terminate */
2152};
Tejun Heo2ee867dc2015-08-18 14:55:34 -07002153
2154static int cfq_print_weight_on_dfl(struct seq_file *sf, void *v)
2155{
2156 struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
2157 struct cfq_group_data *cgd = blkcg_to_cfqgd(blkcg);
2158
2159 seq_printf(sf, "default %u\n", cgd->weight);
2160 blkcg_print_blkgs(sf, blkcg, cfqg_prfill_weight_device,
2161 &blkcg_policy_cfq, 0, false);
2162 return 0;
2163}
2164
2165static ssize_t cfq_set_weight_on_dfl(struct kernfs_open_file *of,
2166 char *buf, size_t nbytes, loff_t off)
2167{
2168 char *endp;
2169 int ret;
2170 u64 v;
2171
2172 buf = strim(buf);
2173
2174 /* "WEIGHT" or "default WEIGHT" sets the default weight */
2175 v = simple_strtoull(buf, &endp, 0);
2176 if (*endp == '\0' || sscanf(buf, "default %llu", &v) == 1) {
Tejun Heo69d7fde2015-08-18 14:55:36 -07002177 ret = __cfq_set_weight(of_css(of), v, true, false, false);
Tejun Heo2ee867dc2015-08-18 14:55:34 -07002178 return ret ?: nbytes;
2179 }
2180
2181 /* "MAJ:MIN WEIGHT" */
2182 return __cfqg_set_weight_device(of, buf, nbytes, off, true, false);
2183}
2184
2185static struct cftype cfq_blkcg_files[] = {
2186 {
2187 .name = "weight",
2188 .flags = CFTYPE_NOT_ON_ROOT,
2189 .seq_show = cfq_print_weight_on_dfl,
2190 .write = cfq_set_weight_on_dfl,
2191 },
2192 { } /* terminate */
2193};
2194
Vivek Goyal25fb5162009-12-03 12:59:46 -05002195#else /* GROUP_IOSCHED */
Tejun Heoae118892015-08-18 14:55:20 -07002196static struct cfq_group *cfq_lookup_cfqg(struct cfq_data *cfqd,
2197 struct blkcg *blkcg)
Vivek Goyal25fb5162009-12-03 12:59:46 -05002198{
Tejun Heof51b8022012-03-05 13:15:05 -08002199 return cfqd->root_group;
Vivek Goyal25fb5162009-12-03 12:59:46 -05002200}
Vivek Goyal7f1dc8a2010-04-21 17:44:16 +02002201
Vivek Goyal25fb5162009-12-03 12:59:46 -05002202static inline void
2203cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) {
2204 cfqq->cfqg = cfqg;
2205}
2206
2207#endif /* GROUP_IOSCHED */
2208
Jens Axboe498d3aa22007-04-26 12:54:48 +02002209/*
Corrado Zoccoloc0324a02009-10-27 19:16:03 +01002210 * The cfqd->service_trees holds all pending cfq_queue's that have
Jens Axboe498d3aa22007-04-26 12:54:48 +02002211 * requests waiting to be processed. It is sorted in the order that
2212 * we will service the queues.
2213 */
Jens Axboea36e71f2009-04-15 12:15:11 +02002214static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
Jens Axboea6151c32009-10-07 20:02:57 +02002215 bool add_front)
Jens Axboed9e76202007-04-20 14:27:50 +02002216{
Jens Axboe08717142008-01-28 11:38:15 +01002217 struct rb_node **p, *parent;
2218 struct cfq_queue *__cfqq;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06002219 u64 rb_key;
Vivek Goyal34b98d02012-10-03 16:56:58 -04002220 struct cfq_rb_root *st;
Jens Axboe498d3aa22007-04-26 12:54:48 +02002221 int left;
Vivek Goyaldae739e2009-12-03 12:59:45 -05002222 int new_cfqq = 1;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06002223 u64 now = ktime_get_ns();
Vivek Goyalae30c282009-12-03 12:59:55 -05002224
Vivek Goyal34b98d02012-10-03 16:56:58 -04002225 st = st_for(cfqq->cfqg, cfqq_class(cfqq), cfqq_type(cfqq));
Jens Axboe08717142008-01-28 11:38:15 +01002226 if (cfq_class_idle(cfqq)) {
2227 rb_key = CFQ_IDLE_DELAY;
Vivek Goyal34b98d02012-10-03 16:56:58 -04002228 parent = rb_last(&st->rb);
Jens Axboe08717142008-01-28 11:38:15 +01002229 if (parent && parent != &cfqq->rb_node) {
2230 __cfqq = rb_entry(parent, struct cfq_queue, rb_node);
2231 rb_key += __cfqq->rb_key;
2232 } else
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06002233 rb_key += now;
Jens Axboe08717142008-01-28 11:38:15 +01002234 } else if (!add_front) {
Jens Axboeb9c89462009-10-06 20:53:44 +02002235 /*
2236 * Get our rb key offset. Subtract any residual slice
2237 * value carried from last service. A negative resid
2238 * count indicates slice overrun, and this should position
2239 * the next service time further away in the tree.
2240 */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06002241 rb_key = cfq_slice_offset(cfqd, cfqq) + now;
Jens Axboeb9c89462009-10-06 20:53:44 +02002242 rb_key -= cfqq->slice_resid;
Jens Axboeedd75ff2007-04-19 12:03:34 +02002243 cfqq->slice_resid = 0;
Corrado Zoccolo48e025e2009-10-05 08:49:23 +02002244 } else {
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06002245 rb_key = -NSEC_PER_SEC;
Vivek Goyal34b98d02012-10-03 16:56:58 -04002246 __cfqq = cfq_rb_first(st);
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06002247 rb_key += __cfqq ? __cfqq->rb_key : now;
Corrado Zoccolo48e025e2009-10-05 08:49:23 +02002248 }
Jens Axboed9e76202007-04-20 14:27:50 +02002249
2250 if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
Vivek Goyaldae739e2009-12-03 12:59:45 -05002251 new_cfqq = 0;
Jens Axboe99f96282007-02-05 11:56:25 +01002252 /*
Jens Axboed9e76202007-04-20 14:27:50 +02002253 * same position, nothing more to do
Jens Axboe99f96282007-02-05 11:56:25 +01002254 */
Vivek Goyal34b98d02012-10-03 16:56:58 -04002255 if (rb_key == cfqq->rb_key && cfqq->service_tree == st)
Jens Axboed9e76202007-04-20 14:27:50 +02002256 return;
Jens Axboe53b037442006-07-28 09:48:51 +02002257
Corrado Zoccoloaa6f6a32009-10-26 22:44:33 +01002258 cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree);
2259 cfqq->service_tree = NULL;
Jens Axboe22e2c502005-06-27 10:55:12 +02002260 }
Jens Axboed9e76202007-04-20 14:27:50 +02002261
Jens Axboe498d3aa22007-04-26 12:54:48 +02002262 left = 1;
Jens Axboe08717142008-01-28 11:38:15 +01002263 parent = NULL;
Vivek Goyal34b98d02012-10-03 16:56:58 -04002264 cfqq->service_tree = st;
2265 p = &st->rb.rb_node;
Jens Axboed9e76202007-04-20 14:27:50 +02002266 while (*p) {
2267 parent = *p;
2268 __cfqq = rb_entry(parent, struct cfq_queue, rb_node);
2269
Jens Axboe0c534e02007-04-18 20:01:57 +02002270 /*
Corrado Zoccoloc0324a02009-10-27 19:16:03 +01002271 * sort by key, that represents service time.
Jens Axboe0c534e02007-04-18 20:01:57 +02002272 */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06002273 if (rb_key < __cfqq->rb_key)
Vivek Goyal1f23f122012-10-03 16:57:00 -04002274 p = &parent->rb_left;
Corrado Zoccoloc0324a02009-10-27 19:16:03 +01002275 else {
Vivek Goyal1f23f122012-10-03 16:57:00 -04002276 p = &parent->rb_right;
Jens Axboecc09e292007-04-26 12:53:50 +02002277 left = 0;
Corrado Zoccoloc0324a02009-10-27 19:16:03 +01002278 }
Jens Axboed9e76202007-04-20 14:27:50 +02002279 }
2280
Jens Axboecc09e292007-04-26 12:53:50 +02002281 if (left)
Vivek Goyal34b98d02012-10-03 16:56:58 -04002282 st->left = &cfqq->rb_node;
Jens Axboecc09e292007-04-26 12:53:50 +02002283
Jens Axboed9e76202007-04-20 14:27:50 +02002284 cfqq->rb_key = rb_key;
2285 rb_link_node(&cfqq->rb_node, parent, p);
Vivek Goyal34b98d02012-10-03 16:56:58 -04002286 rb_insert_color(&cfqq->rb_node, &st->rb);
2287 st->count++;
Namhyung Kim20359f22011-05-24 10:23:22 +02002288 if (add_front || !new_cfqq)
Vivek Goyaldae739e2009-12-03 12:59:45 -05002289 return;
Justin TerAvest8184f932011-03-17 16:12:36 +01002290 cfq_group_notify_queue_add(cfqd, cfqq->cfqg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002291}
2292
Jens Axboea36e71f2009-04-15 12:15:11 +02002293static struct cfq_queue *
Jens Axboef2d1f0a2009-04-23 12:19:38 +02002294cfq_prio_tree_lookup(struct cfq_data *cfqd, struct rb_root *root,
2295 sector_t sector, struct rb_node **ret_parent,
2296 struct rb_node ***rb_link)
Jens Axboea36e71f2009-04-15 12:15:11 +02002297{
Jens Axboea36e71f2009-04-15 12:15:11 +02002298 struct rb_node **p, *parent;
2299 struct cfq_queue *cfqq = NULL;
2300
2301 parent = NULL;
2302 p = &root->rb_node;
2303 while (*p) {
2304 struct rb_node **n;
2305
2306 parent = *p;
2307 cfqq = rb_entry(parent, struct cfq_queue, p_node);
2308
2309 /*
2310 * Sort strictly based on sector. Smallest to the left,
2311 * largest to the right.
2312 */
Tejun Heo2e46e8b2009-05-07 22:24:41 +09002313 if (sector > blk_rq_pos(cfqq->next_rq))
Jens Axboea36e71f2009-04-15 12:15:11 +02002314 n = &(*p)->rb_right;
Tejun Heo2e46e8b2009-05-07 22:24:41 +09002315 else if (sector < blk_rq_pos(cfqq->next_rq))
Jens Axboea36e71f2009-04-15 12:15:11 +02002316 n = &(*p)->rb_left;
2317 else
2318 break;
2319 p = n;
Jens Axboe3ac6c9f2009-04-23 12:14:56 +02002320 cfqq = NULL;
Jens Axboea36e71f2009-04-15 12:15:11 +02002321 }
2322
2323 *ret_parent = parent;
2324 if (rb_link)
2325 *rb_link = p;
Jens Axboe3ac6c9f2009-04-23 12:14:56 +02002326 return cfqq;
Jens Axboea36e71f2009-04-15 12:15:11 +02002327}
2328
2329static void cfq_prio_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq)
2330{
Jens Axboea36e71f2009-04-15 12:15:11 +02002331 struct rb_node **p, *parent;
2332 struct cfq_queue *__cfqq;
2333
Jens Axboef2d1f0a2009-04-23 12:19:38 +02002334 if (cfqq->p_root) {
2335 rb_erase(&cfqq->p_node, cfqq->p_root);
2336 cfqq->p_root = NULL;
2337 }
Jens Axboea36e71f2009-04-15 12:15:11 +02002338
2339 if (cfq_class_idle(cfqq))
2340 return;
2341 if (!cfqq->next_rq)
2342 return;
2343
Jens Axboef2d1f0a2009-04-23 12:19:38 +02002344 cfqq->p_root = &cfqd->prio_trees[cfqq->org_ioprio];
Tejun Heo2e46e8b2009-05-07 22:24:41 +09002345 __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root,
2346 blk_rq_pos(cfqq->next_rq), &parent, &p);
Jens Axboe3ac6c9f2009-04-23 12:14:56 +02002347 if (!__cfqq) {
2348 rb_link_node(&cfqq->p_node, parent, p);
Jens Axboef2d1f0a2009-04-23 12:19:38 +02002349 rb_insert_color(&cfqq->p_node, cfqq->p_root);
2350 } else
2351 cfqq->p_root = NULL;
Jens Axboea36e71f2009-04-15 12:15:11 +02002352}
2353
Jens Axboe498d3aa22007-04-26 12:54:48 +02002354/*
2355 * Update cfqq's position in the service tree.
2356 */
Jens Axboeedd75ff2007-04-19 12:03:34 +02002357static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq)
Jens Axboe6d048f52007-04-25 12:44:27 +02002358{
Jens Axboe6d048f52007-04-25 12:44:27 +02002359 /*
2360 * Resorting requires the cfqq to be on the RR list already.
2361 */
Jens Axboea36e71f2009-04-15 12:15:11 +02002362 if (cfq_cfqq_on_rr(cfqq)) {
Jens Axboeedd75ff2007-04-19 12:03:34 +02002363 cfq_service_tree_add(cfqd, cfqq, 0);
Jens Axboea36e71f2009-04-15 12:15:11 +02002364 cfq_prio_tree_add(cfqd, cfqq);
2365 }
Jens Axboe6d048f52007-04-25 12:44:27 +02002366}
2367
Linus Torvalds1da177e2005-04-16 15:20:36 -07002368/*
2369 * add to busy list of queues for service, trying to be fair in ordering
Jens Axboe22e2c502005-06-27 10:55:12 +02002370 * the pending list according to last request service
Linus Torvalds1da177e2005-04-16 15:20:36 -07002371 */
Jens Axboefebffd62008-01-28 13:19:43 +01002372static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002373{
Jens Axboe7b679132008-05-30 12:23:07 +02002374 cfq_log_cfqq(cfqd, cfqq, "add_to_rr");
Jens Axboe3b181522005-06-27 10:56:24 +02002375 BUG_ON(cfq_cfqq_on_rr(cfqq));
2376 cfq_mark_cfqq_on_rr(cfqq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002377 cfqd->busy_queues++;
Shaohua Lief8a41d2011-03-07 09:26:29 +01002378 if (cfq_cfqq_sync(cfqq))
2379 cfqd->busy_sync_queues++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002380
Jens Axboeedd75ff2007-04-19 12:03:34 +02002381 cfq_resort_rr_list(cfqd, cfqq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002382}
2383
Jens Axboe498d3aa22007-04-26 12:54:48 +02002384/*
2385 * Called when the cfqq no longer has requests pending, remove it from
2386 * the service tree.
2387 */
Jens Axboefebffd62008-01-28 13:19:43 +01002388static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002389{
Jens Axboe7b679132008-05-30 12:23:07 +02002390 cfq_log_cfqq(cfqd, cfqq, "del_from_rr");
Jens Axboe3b181522005-06-27 10:56:24 +02002391 BUG_ON(!cfq_cfqq_on_rr(cfqq));
2392 cfq_clear_cfqq_on_rr(cfqq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002393
Corrado Zoccoloaa6f6a32009-10-26 22:44:33 +01002394 if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
2395 cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree);
2396 cfqq->service_tree = NULL;
2397 }
Jens Axboef2d1f0a2009-04-23 12:19:38 +02002398 if (cfqq->p_root) {
2399 rb_erase(&cfqq->p_node, cfqq->p_root);
2400 cfqq->p_root = NULL;
2401 }
Jens Axboed9e76202007-04-20 14:27:50 +02002402
Justin TerAvest8184f932011-03-17 16:12:36 +01002403 cfq_group_notify_queue_del(cfqd, cfqq->cfqg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002404 BUG_ON(!cfqd->busy_queues);
2405 cfqd->busy_queues--;
Shaohua Lief8a41d2011-03-07 09:26:29 +01002406 if (cfq_cfqq_sync(cfqq))
2407 cfqd->busy_sync_queues--;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002408}
2409
2410/*
2411 * rb tree support functions
2412 */
Jens Axboefebffd62008-01-28 13:19:43 +01002413static void cfq_del_rq_rb(struct request *rq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002414{
Jens Axboe5e705372006-07-13 12:39:25 +02002415 struct cfq_queue *cfqq = RQ_CFQQ(rq);
Jens Axboe5e705372006-07-13 12:39:25 +02002416 const int sync = rq_is_sync(rq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002417
Jens Axboeb4878f22005-10-20 16:42:29 +02002418 BUG_ON(!cfqq->queued[sync]);
2419 cfqq->queued[sync]--;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002420
Jens Axboe5e705372006-07-13 12:39:25 +02002421 elv_rb_del(&cfqq->sort_list, rq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002422
Vivek Goyalf04a6422009-12-03 12:59:40 -05002423 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) {
2424 /*
2425 * Queue will be deleted from service tree when we actually
2426 * expire it later. Right now just remove it from prio tree
2427 * as it is empty.
2428 */
2429 if (cfqq->p_root) {
2430 rb_erase(&cfqq->p_node, cfqq->p_root);
2431 cfqq->p_root = NULL;
2432 }
2433 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002434}
2435
Jens Axboe5e705372006-07-13 12:39:25 +02002436static void cfq_add_rq_rb(struct request *rq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002437{
Jens Axboe5e705372006-07-13 12:39:25 +02002438 struct cfq_queue *cfqq = RQ_CFQQ(rq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002439 struct cfq_data *cfqd = cfqq->cfqd;
Jeff Moyer796d5112011-06-02 21:19:05 +02002440 struct request *prev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002441
Jens Axboe5380a102006-07-13 12:37:56 +02002442 cfqq->queued[rq_is_sync(rq)]++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002443
Jeff Moyer796d5112011-06-02 21:19:05 +02002444 elv_rb_add(&cfqq->sort_list, rq);
Jens Axboe5fccbf62006-10-31 14:21:55 +01002445
2446 if (!cfq_cfqq_on_rr(cfqq))
2447 cfq_add_cfqq_rr(cfqd, cfqq);
Jens Axboe5044eed2007-04-25 11:53:48 +02002448
2449 /*
2450 * check if this request is a better next-serve candidate
2451 */
Jens Axboea36e71f2009-04-15 12:15:11 +02002452 prev = cfqq->next_rq;
Corrado Zoccolocf7c25c2009-11-08 17:16:46 +01002453 cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq, cfqd->last_position);
Jens Axboea36e71f2009-04-15 12:15:11 +02002454
2455 /*
2456 * adjust priority tree position, if ->next_rq changes
2457 */
2458 if (prev != cfqq->next_rq)
2459 cfq_prio_tree_add(cfqd, cfqq);
2460
Jens Axboe5044eed2007-04-25 11:53:48 +02002461 BUG_ON(!cfqq->next_rq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002462}
2463
Jens Axboefebffd62008-01-28 13:19:43 +01002464static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002465{
Jens Axboe5380a102006-07-13 12:37:56 +02002466 elv_rb_del(&cfqq->sort_list, rq);
2467 cfqq->queued[rq_is_sync(rq)]--;
Christoph Hellwigef295ec2016-10-28 08:48:16 -06002468 cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags);
Jens Axboe5e705372006-07-13 12:39:25 +02002469 cfq_add_rq_rb(rq);
Tejun Heo155fead2012-04-01 14:38:44 -07002470 cfqg_stats_update_io_add(RQ_CFQG(rq), cfqq->cfqd->serving_group,
Christoph Hellwigef295ec2016-10-28 08:48:16 -06002471 rq->cmd_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002472}
2473
Jens Axboe206dc692006-03-28 13:03:44 +02002474static struct request *
2475cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002476{
Jens Axboe206dc692006-03-28 13:03:44 +02002477 struct task_struct *tsk = current;
Tejun Heoc5869802011-12-14 00:33:41 +01002478 struct cfq_io_cq *cic;
Jens Axboe206dc692006-03-28 13:03:44 +02002479 struct cfq_queue *cfqq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002480
Jens Axboe4ac845a2008-01-24 08:44:49 +01002481 cic = cfq_cic_lookup(cfqd, tsk->io_context);
Vasily Tarasov91fac312007-04-25 12:29:51 +02002482 if (!cic)
2483 return NULL;
2484
Christoph Hellwigaa39ebd2016-11-01 07:40:02 -06002485 cfqq = cic_to_cfqq(cic, op_is_sync(bio->bi_opf));
Kent Overstreetf73a1c72012-09-25 15:05:12 -07002486 if (cfqq)
2487 return elv_rb_find(&cfqq->sort_list, bio_end_sector(bio));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002488
Linus Torvalds1da177e2005-04-16 15:20:36 -07002489 return NULL;
2490}
2491
Jens Axboe165125e2007-07-24 09:28:11 +02002492static void cfq_activate_request(struct request_queue *q, struct request *rq)
Jens Axboeb4878f22005-10-20 16:42:29 +02002493{
2494 struct cfq_data *cfqd = q->elevator->elevator_data;
2495
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01002496 cfqd->rq_in_driver++;
Jens Axboe7b679132008-05-30 12:23:07 +02002497 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01002498 cfqd->rq_in_driver);
Jens Axboe25776e32006-06-01 10:12:26 +02002499
Tejun Heo5b936292009-05-07 22:24:38 +09002500 cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
Jens Axboeb4878f22005-10-20 16:42:29 +02002501}
2502
Jens Axboe165125e2007-07-24 09:28:11 +02002503static void cfq_deactivate_request(struct request_queue *q, struct request *rq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002504{
Jens Axboe22e2c502005-06-27 10:55:12 +02002505 struct cfq_data *cfqd = q->elevator->elevator_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002506
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01002507 WARN_ON(!cfqd->rq_in_driver);
2508 cfqd->rq_in_driver--;
Jens Axboe7b679132008-05-30 12:23:07 +02002509 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d",
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01002510 cfqd->rq_in_driver);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002511}
2512
Jens Axboeb4878f22005-10-20 16:42:29 +02002513static void cfq_remove_request(struct request *rq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002514{
Jens Axboe5e705372006-07-13 12:39:25 +02002515 struct cfq_queue *cfqq = RQ_CFQQ(rq);
Jens Axboe21183b02006-07-13 12:33:14 +02002516
Jens Axboe5e705372006-07-13 12:39:25 +02002517 if (cfqq->next_rq == rq)
2518 cfqq->next_rq = cfq_find_next_rq(cfqq->cfqd, cfqq, rq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002519
Jens Axboeb4878f22005-10-20 16:42:29 +02002520 list_del_init(&rq->queuelist);
Jens Axboe5e705372006-07-13 12:39:25 +02002521 cfq_del_rq_rb(rq);
Jens Axboe374f84a2006-07-23 01:42:19 +02002522
Aaron Carroll45333d52008-08-26 15:52:36 +02002523 cfqq->cfqd->rq_queued--;
Christoph Hellwigef295ec2016-10-28 08:48:16 -06002524 cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags);
Christoph Hellwig65299a32011-08-23 14:50:29 +02002525 if (rq->cmd_flags & REQ_PRIO) {
2526 WARN_ON(!cfqq->prio_pending);
2527 cfqq->prio_pending--;
Jens Axboeb53d1ed2011-08-19 08:34:48 +02002528 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002529}
2530
Jens Axboe165125e2007-07-24 09:28:11 +02002531static int cfq_merge(struct request_queue *q, struct request **req,
2532 struct bio *bio)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002533{
2534 struct cfq_data *cfqd = q->elevator->elevator_data;
2535 struct request *__rq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002536
Jens Axboe206dc692006-03-28 13:03:44 +02002537 __rq = cfq_find_rq_fmerge(cfqd, bio);
Tahsin Erdogan72ef7992016-07-07 11:48:22 -07002538 if (__rq && elv_bio_merge_ok(__rq, bio)) {
Jens Axboe98170642006-07-28 09:23:08 +02002539 *req = __rq;
2540 return ELEVATOR_FRONT_MERGE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002541 }
2542
2543 return ELEVATOR_NO_MERGE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002544}
2545
Jens Axboe165125e2007-07-24 09:28:11 +02002546static void cfq_merged_request(struct request_queue *q, struct request *req,
Jens Axboe21183b02006-07-13 12:33:14 +02002547 int type)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548{
Jens Axboe21183b02006-07-13 12:33:14 +02002549 if (type == ELEVATOR_FRONT_MERGE) {
Jens Axboe5e705372006-07-13 12:39:25 +02002550 struct cfq_queue *cfqq = RQ_CFQQ(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002551
Jens Axboe5e705372006-07-13 12:39:25 +02002552 cfq_reposition_rq_rb(cfqq, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002553 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002554}
2555
Divyesh Shah812d4022010-04-08 21:14:23 -07002556static void cfq_bio_merged(struct request_queue *q, struct request *req,
2557 struct bio *bio)
2558{
Christoph Hellwigef295ec2016-10-28 08:48:16 -06002559 cfqg_stats_update_io_merged(RQ_CFQG(req), bio->bi_opf);
Divyesh Shah812d4022010-04-08 21:14:23 -07002560}
2561
Linus Torvalds1da177e2005-04-16 15:20:36 -07002562static void
Jens Axboe165125e2007-07-24 09:28:11 +02002563cfq_merged_requests(struct request_queue *q, struct request *rq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002564 struct request *next)
2565{
Corrado Zoccolocf7c25c2009-11-08 17:16:46 +01002566 struct cfq_queue *cfqq = RQ_CFQQ(rq);
Shaohua Li4a0b75c2011-12-16 14:00:22 +01002567 struct cfq_data *cfqd = q->elevator->elevator_data;
2568
Jens Axboe22e2c502005-06-27 10:55:12 +02002569 /*
2570 * reposition in fifo if next is older than rq
2571 */
2572 if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06002573 next->fifo_time < rq->fifo_time &&
Shaohua Li3d106fba2012-11-06 12:39:51 +01002574 cfqq == RQ_CFQQ(next)) {
Jens Axboe22e2c502005-06-27 10:55:12 +02002575 list_move(&rq->queuelist, &next->queuelist);
Jan Kara8b4922d2014-02-24 16:39:52 +01002576 rq->fifo_time = next->fifo_time;
Jens Axboe30996f42009-10-05 11:03:39 +02002577 }
Jens Axboe22e2c502005-06-27 10:55:12 +02002578
Corrado Zoccolocf7c25c2009-11-08 17:16:46 +01002579 if (cfqq->next_rq == next)
2580 cfqq->next_rq = rq;
Jens Axboeb4878f22005-10-20 16:42:29 +02002581 cfq_remove_request(next);
Christoph Hellwigef295ec2016-10-28 08:48:16 -06002582 cfqg_stats_update_io_merged(RQ_CFQG(rq), next->cmd_flags);
Shaohua Li4a0b75c2011-12-16 14:00:22 +01002583
2584 cfqq = RQ_CFQQ(next);
2585 /*
2586 * all requests of this queue are merged to other queues, delete it
2587 * from the service tree. If it's the active_queue,
2588 * cfq_dispatch_requests() will choose to expire it or do idle
2589 */
2590 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list) &&
2591 cfqq != cfqd->active_queue)
2592 cfq_del_cfqq_rr(cfqd, cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02002593}
2594
Tahsin Erdogan72ef7992016-07-07 11:48:22 -07002595static int cfq_allow_bio_merge(struct request_queue *q, struct request *rq,
2596 struct bio *bio)
Jens Axboeda775262006-12-20 11:04:12 +01002597{
2598 struct cfq_data *cfqd = q->elevator->elevator_data;
Christoph Hellwigaa39ebd2016-11-01 07:40:02 -06002599 bool is_sync = op_is_sync(bio->bi_opf);
Tejun Heoc5869802011-12-14 00:33:41 +01002600 struct cfq_io_cq *cic;
Jens Axboeda775262006-12-20 11:04:12 +01002601 struct cfq_queue *cfqq;
Jens Axboeda775262006-12-20 11:04:12 +01002602
2603 /*
Jens Axboeec8acb62007-01-02 18:32:11 +01002604 * Disallow merge of a sync bio into an async request.
Jens Axboeda775262006-12-20 11:04:12 +01002605 */
Christoph Hellwigaa39ebd2016-11-01 07:40:02 -06002606 if (is_sync && !rq_is_sync(rq))
Jens Axboea6151c32009-10-07 20:02:57 +02002607 return false;
Jens Axboeda775262006-12-20 11:04:12 +01002608
2609 /*
Tejun Heof1a4f4d2011-12-14 00:33:39 +01002610 * Lookup the cfqq that this bio will be queued with and allow
Tejun Heo07c2bd32012-02-08 09:19:42 +01002611 * merge only if rq is queued there.
Jens Axboeda775262006-12-20 11:04:12 +01002612 */
Tejun Heo07c2bd32012-02-08 09:19:42 +01002613 cic = cfq_cic_lookup(cfqd, current->io_context);
2614 if (!cic)
2615 return false;
Jens Axboe719d3402006-12-22 09:38:53 +01002616
Christoph Hellwigaa39ebd2016-11-01 07:40:02 -06002617 cfqq = cic_to_cfqq(cic, is_sync);
Jens Axboea6151c32009-10-07 20:02:57 +02002618 return cfqq == RQ_CFQQ(rq);
Jens Axboeda775262006-12-20 11:04:12 +01002619}
2620
Tahsin Erdogan72ef7992016-07-07 11:48:22 -07002621static int cfq_allow_rq_merge(struct request_queue *q, struct request *rq,
2622 struct request *next)
2623{
2624 return RQ_CFQQ(rq) == RQ_CFQQ(next);
2625}
2626
Divyesh Shah812df482010-04-08 21:15:35 -07002627static inline void cfq_del_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
2628{
Jan Kara91148322016-06-08 15:11:39 +02002629 hrtimer_try_to_cancel(&cfqd->idle_slice_timer);
Tejun Heo155fead2012-04-01 14:38:44 -07002630 cfqg_stats_update_idle_time(cfqq->cfqg);
Divyesh Shah812df482010-04-08 21:15:35 -07002631}
2632
Jens Axboefebffd62008-01-28 13:19:43 +01002633static void __cfq_set_active_queue(struct cfq_data *cfqd,
2634 struct cfq_queue *cfqq)
Jens Axboe22e2c502005-06-27 10:55:12 +02002635{
2636 if (cfqq) {
Vivek Goyal3bf10fe2012-10-03 16:56:56 -04002637 cfq_log_cfqq(cfqd, cfqq, "set_active wl_class:%d wl_type:%d",
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04002638 cfqd->serving_wl_class, cfqd->serving_wl_type);
Tejun Heo155fead2012-04-01 14:38:44 -07002639 cfqg_stats_update_avg_queue_size(cfqq->cfqg);
Justin TerAvest62a37f62011-03-23 08:25:44 +01002640 cfqq->slice_start = 0;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06002641 cfqq->dispatch_start = ktime_get_ns();
Justin TerAvest62a37f62011-03-23 08:25:44 +01002642 cfqq->allocated_slice = 0;
2643 cfqq->slice_end = 0;
2644 cfqq->slice_dispatch = 0;
2645 cfqq->nr_sectors = 0;
2646
2647 cfq_clear_cfqq_wait_request(cfqq);
2648 cfq_clear_cfqq_must_dispatch(cfqq);
2649 cfq_clear_cfqq_must_alloc_slice(cfqq);
2650 cfq_clear_cfqq_fifo_expire(cfqq);
2651 cfq_mark_cfqq_slice_new(cfqq);
2652
2653 cfq_del_timer(cfqd, cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02002654 }
2655
2656 cfqd->active_queue = cfqq;
2657}
2658
2659/*
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002660 * current cfqq expired its slice (or was too idle), select new one
2661 */
2662static void
2663__cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
Vivek Goyale5ff0822010-04-26 19:25:11 +02002664 bool timed_out)
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002665{
Jens Axboe7b679132008-05-30 12:23:07 +02002666 cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out);
2667
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002668 if (cfq_cfqq_wait_request(cfqq))
Divyesh Shah812df482010-04-08 21:15:35 -07002669 cfq_del_timer(cfqd, cfqq);
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002670
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002671 cfq_clear_cfqq_wait_request(cfqq);
Vivek Goyalf75edf22009-12-03 12:59:53 -05002672 cfq_clear_cfqq_wait_busy(cfqq);
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002673
2674 /*
Shaohua Liae54abe2010-02-05 13:11:45 +01002675 * If this cfqq is shared between multiple processes, check to
2676 * make sure that those processes are still issuing I/Os within
2677 * the mean seek distance. If not, it may be time to break the
2678 * queues apart again.
2679 */
2680 if (cfq_cfqq_coop(cfqq) && CFQQ_SEEKY(cfqq))
2681 cfq_mark_cfqq_split_coop(cfqq);
2682
2683 /*
Jens Axboe6084cdd2007-04-23 08:25:00 +02002684 * store what was left of this slice, if the queue idled/timed out
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002685 */
Shaohua Lic553f8e2011-01-14 08:41:03 +01002686 if (timed_out) {
2687 if (cfq_cfqq_slice_new(cfqq))
Vivek Goyalba5bd522011-01-19 08:25:02 -07002688 cfqq->slice_resid = cfq_scaled_cfqq_slice(cfqd, cfqq);
Shaohua Lic553f8e2011-01-14 08:41:03 +01002689 else
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06002690 cfqq->slice_resid = cfqq->slice_end - ktime_get_ns();
Jan Kara93fdf142016-06-28 09:04:00 +02002691 cfq_log_cfqq(cfqd, cfqq, "resid=%lld", cfqq->slice_resid);
Jens Axboe7b679132008-05-30 12:23:07 +02002692 }
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002693
Vivek Goyale5ff0822010-04-26 19:25:11 +02002694 cfq_group_served(cfqd, cfqq->cfqg, cfqq);
Vivek Goyaldae739e2009-12-03 12:59:45 -05002695
Vivek Goyalf04a6422009-12-03 12:59:40 -05002696 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list))
2697 cfq_del_cfqq_rr(cfqd, cfqq);
2698
Jens Axboeedd75ff2007-04-19 12:03:34 +02002699 cfq_resort_rr_list(cfqd, cfqq);
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002700
2701 if (cfqq == cfqd->active_queue)
2702 cfqd->active_queue = NULL;
2703
2704 if (cfqd->active_cic) {
Tejun Heo11a31222012-02-07 07:51:30 +01002705 put_io_context(cfqd->active_cic->icq.ioc);
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002706 cfqd->active_cic = NULL;
2707 }
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002708}
2709
Vivek Goyale5ff0822010-04-26 19:25:11 +02002710static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out)
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002711{
2712 struct cfq_queue *cfqq = cfqd->active_queue;
2713
2714 if (cfqq)
Vivek Goyale5ff0822010-04-26 19:25:11 +02002715 __cfq_slice_expired(cfqd, cfqq, timed_out);
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002716}
2717
Jens Axboe498d3aa22007-04-26 12:54:48 +02002718/*
2719 * Get next queue for service. Unless we have a queue preemption,
2720 * we'll simply select the first cfqq in the service tree.
2721 */
Jens Axboe6d048f52007-04-25 12:44:27 +02002722static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
Jens Axboe22e2c502005-06-27 10:55:12 +02002723{
Vivek Goyal34b98d02012-10-03 16:56:58 -04002724 struct cfq_rb_root *st = st_for(cfqd->serving_group,
2725 cfqd->serving_wl_class, cfqd->serving_wl_type);
Jens Axboeedd75ff2007-04-19 12:03:34 +02002726
Vivek Goyalf04a6422009-12-03 12:59:40 -05002727 if (!cfqd->rq_queued)
2728 return NULL;
2729
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05002730 /* There is nothing to dispatch */
Vivek Goyal34b98d02012-10-03 16:56:58 -04002731 if (!st)
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05002732 return NULL;
Vivek Goyal34b98d02012-10-03 16:56:58 -04002733 if (RB_EMPTY_ROOT(&st->rb))
Corrado Zoccoloc0324a02009-10-27 19:16:03 +01002734 return NULL;
Vivek Goyal34b98d02012-10-03 16:56:58 -04002735 return cfq_rb_first(st);
Jens Axboe6d048f52007-04-25 12:44:27 +02002736}
2737
Vivek Goyalf04a6422009-12-03 12:59:40 -05002738static struct cfq_queue *cfq_get_next_queue_forced(struct cfq_data *cfqd)
2739{
Vivek Goyal25fb5162009-12-03 12:59:46 -05002740 struct cfq_group *cfqg;
Vivek Goyalf04a6422009-12-03 12:59:40 -05002741 struct cfq_queue *cfqq;
2742 int i, j;
2743 struct cfq_rb_root *st;
2744
2745 if (!cfqd->rq_queued)
2746 return NULL;
2747
Vivek Goyal25fb5162009-12-03 12:59:46 -05002748 cfqg = cfq_get_next_cfqg(cfqd);
2749 if (!cfqg)
2750 return NULL;
2751
Markus Elfring1cf41752017-01-21 22:44:07 +01002752 for_each_cfqg_st(cfqg, i, j, st) {
2753 cfqq = cfq_rb_first(st);
2754 if (cfqq)
Vivek Goyalf04a6422009-12-03 12:59:40 -05002755 return cfqq;
Markus Elfring1cf41752017-01-21 22:44:07 +01002756 }
Vivek Goyalf04a6422009-12-03 12:59:40 -05002757 return NULL;
2758}
2759
Jens Axboe498d3aa22007-04-26 12:54:48 +02002760/*
2761 * Get and set a new active queue for service.
2762 */
Jens Axboea36e71f2009-04-15 12:15:11 +02002763static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd,
2764 struct cfq_queue *cfqq)
Jens Axboe6d048f52007-04-25 12:44:27 +02002765{
Jens Axboee00ef792009-11-04 08:54:55 +01002766 if (!cfqq)
Jens Axboea36e71f2009-04-15 12:15:11 +02002767 cfqq = cfq_get_next_queue(cfqd);
Jens Axboe6d048f52007-04-25 12:44:27 +02002768
Jens Axboe22e2c502005-06-27 10:55:12 +02002769 __cfq_set_active_queue(cfqd, cfqq);
Jens Axboe3b181522005-06-27 10:56:24 +02002770 return cfqq;
Jens Axboe22e2c502005-06-27 10:55:12 +02002771}
2772
Jens Axboed9e76202007-04-20 14:27:50 +02002773static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
2774 struct request *rq)
2775{
Tejun Heo83096eb2009-05-07 22:24:39 +09002776 if (blk_rq_pos(rq) >= cfqd->last_position)
2777 return blk_rq_pos(rq) - cfqd->last_position;
Jens Axboed9e76202007-04-20 14:27:50 +02002778 else
Tejun Heo83096eb2009-05-07 22:24:39 +09002779 return cfqd->last_position - blk_rq_pos(rq);
Jens Axboed9e76202007-04-20 14:27:50 +02002780}
2781
Jeff Moyerb2c18e12009-10-23 17:14:49 -04002782static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq,
Shaohua Lie9ce3352010-03-19 08:03:04 +01002783 struct request *rq)
Jens Axboe6d048f52007-04-25 12:44:27 +02002784{
Shaohua Lie9ce3352010-03-19 08:03:04 +01002785 return cfq_dist_from_last(cfqd, rq) <= CFQQ_CLOSE_THR;
Jens Axboe6d048f52007-04-25 12:44:27 +02002786}
2787
Jens Axboea36e71f2009-04-15 12:15:11 +02002788static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
2789 struct cfq_queue *cur_cfqq)
Jens Axboe6d048f52007-04-25 12:44:27 +02002790{
Jens Axboef2d1f0a2009-04-23 12:19:38 +02002791 struct rb_root *root = &cfqd->prio_trees[cur_cfqq->org_ioprio];
Jens Axboea36e71f2009-04-15 12:15:11 +02002792 struct rb_node *parent, *node;
2793 struct cfq_queue *__cfqq;
2794 sector_t sector = cfqd->last_position;
2795
2796 if (RB_EMPTY_ROOT(root))
2797 return NULL;
2798
2799 /*
2800 * First, if we find a request starting at the end of the last
2801 * request, choose it.
2802 */
Jens Axboef2d1f0a2009-04-23 12:19:38 +02002803 __cfqq = cfq_prio_tree_lookup(cfqd, root, sector, &parent, NULL);
Jens Axboea36e71f2009-04-15 12:15:11 +02002804 if (__cfqq)
2805 return __cfqq;
2806
2807 /*
2808 * If the exact sector wasn't found, the parent of the NULL leaf
2809 * will contain the closest sector.
2810 */
2811 __cfqq = rb_entry(parent, struct cfq_queue, p_node);
Shaohua Lie9ce3352010-03-19 08:03:04 +01002812 if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq))
Jens Axboea36e71f2009-04-15 12:15:11 +02002813 return __cfqq;
2814
Tejun Heo2e46e8b2009-05-07 22:24:41 +09002815 if (blk_rq_pos(__cfqq->next_rq) < sector)
Jens Axboea36e71f2009-04-15 12:15:11 +02002816 node = rb_next(&__cfqq->p_node);
2817 else
2818 node = rb_prev(&__cfqq->p_node);
2819 if (!node)
2820 return NULL;
2821
2822 __cfqq = rb_entry(node, struct cfq_queue, p_node);
Shaohua Lie9ce3352010-03-19 08:03:04 +01002823 if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq))
Jens Axboea36e71f2009-04-15 12:15:11 +02002824 return __cfqq;
2825
2826 return NULL;
2827}
2828
2829/*
2830 * cfqd - obvious
2831 * cur_cfqq - passed in so that we don't decide that the current queue is
2832 * closely cooperating with itself.
2833 *
2834 * So, basically we're assuming that that cur_cfqq has dispatched at least
2835 * one request, and that cfqd->last_position reflects a position on the disk
2836 * associated with the I/O issued by cur_cfqq. I'm not sure this is a valid
2837 * assumption.
2838 */
2839static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd,
Jeff Moyerb3b6d042009-10-23 17:14:51 -04002840 struct cfq_queue *cur_cfqq)
Jens Axboea36e71f2009-04-15 12:15:11 +02002841{
2842 struct cfq_queue *cfqq;
2843
Divyesh Shah39c01b22010-03-25 15:45:57 +01002844 if (cfq_class_idle(cur_cfqq))
2845 return NULL;
Jeff Moyere6c5bc72009-10-23 17:14:52 -04002846 if (!cfq_cfqq_sync(cur_cfqq))
2847 return NULL;
2848 if (CFQQ_SEEKY(cur_cfqq))
2849 return NULL;
2850
Jens Axboea36e71f2009-04-15 12:15:11 +02002851 /*
Gui Jianfengb9d8f4c2009-12-08 08:54:17 +01002852 * Don't search priority tree if it's the only queue in the group.
2853 */
2854 if (cur_cfqq->cfqg->nr_cfqq == 1)
2855 return NULL;
2856
2857 /*
Jens Axboed9e76202007-04-20 14:27:50 +02002858 * We should notice if some of the queues are cooperating, eg
2859 * working closely on the same area of the disk. In that case,
2860 * we can group them together and don't waste time idling.
Jens Axboe6d048f52007-04-25 12:44:27 +02002861 */
Jens Axboea36e71f2009-04-15 12:15:11 +02002862 cfqq = cfqq_close(cfqd, cur_cfqq);
2863 if (!cfqq)
2864 return NULL;
2865
Vivek Goyal8682e1f2009-12-03 12:59:50 -05002866 /* If new queue belongs to different cfq_group, don't choose it */
2867 if (cur_cfqq->cfqg != cfqq->cfqg)
2868 return NULL;
2869
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04002870 /*
2871 * It only makes sense to merge sync queues.
2872 */
2873 if (!cfq_cfqq_sync(cfqq))
2874 return NULL;
Jeff Moyere6c5bc72009-10-23 17:14:52 -04002875 if (CFQQ_SEEKY(cfqq))
2876 return NULL;
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04002877
Corrado Zoccoloc0324a02009-10-27 19:16:03 +01002878 /*
2879 * Do not merge queues of different priority classes
2880 */
2881 if (cfq_class_rt(cfqq) != cfq_class_rt(cur_cfqq))
2882 return NULL;
2883
Jens Axboea36e71f2009-04-15 12:15:11 +02002884 return cfqq;
Jens Axboe6d048f52007-04-25 12:44:27 +02002885}
2886
Corrado Zoccoloa6d44e92009-10-26 22:45:11 +01002887/*
2888 * Determine whether we should enforce idle window for this queue.
2889 */
2890
2891static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
2892{
Vivek Goyal3bf10fe2012-10-03 16:56:56 -04002893 enum wl_class_t wl_class = cfqq_class(cfqq);
Vivek Goyal34b98d02012-10-03 16:56:58 -04002894 struct cfq_rb_root *st = cfqq->service_tree;
Corrado Zoccoloa6d44e92009-10-26 22:45:11 +01002895
Vivek Goyal34b98d02012-10-03 16:56:58 -04002896 BUG_ON(!st);
2897 BUG_ON(!st->count);
Vivek Goyalf04a6422009-12-03 12:59:40 -05002898
Vivek Goyalb6508c12010-08-23 12:23:33 +02002899 if (!cfqd->cfq_slice_idle)
2900 return false;
2901
Corrado Zoccoloa6d44e92009-10-26 22:45:11 +01002902 /* We never do for idle class queues. */
Vivek Goyal3bf10fe2012-10-03 16:56:56 -04002903 if (wl_class == IDLE_WORKLOAD)
Corrado Zoccoloa6d44e92009-10-26 22:45:11 +01002904 return false;
2905
2906 /* We do for queues that were marked with idle window flag. */
Shaohua Li3c764b72009-12-04 13:12:06 +01002907 if (cfq_cfqq_idle_window(cfqq) &&
2908 !(blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag))
Corrado Zoccoloa6d44e92009-10-26 22:45:11 +01002909 return true;
2910
2911 /*
2912 * Otherwise, we do only if they are the last ones
2913 * in their service tree.
2914 */
Vivek Goyal34b98d02012-10-03 16:56:58 -04002915 if (st->count == 1 && cfq_cfqq_sync(cfqq) &&
2916 !cfq_io_thinktime_big(cfqd, &st->ttime, false))
Shaohua Lic1e44752010-11-08 15:01:02 +01002917 return true;
Vivek Goyal34b98d02012-10-03 16:56:58 -04002918 cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", st->count);
Shaohua Lic1e44752010-11-08 15:01:02 +01002919 return false;
Corrado Zoccoloa6d44e92009-10-26 22:45:11 +01002920}
2921
Jens Axboe6d048f52007-04-25 12:44:27 +02002922static void cfq_arm_slice_timer(struct cfq_data *cfqd)
Jens Axboe22e2c502005-06-27 10:55:12 +02002923{
Jens Axboe17926692007-01-19 11:59:30 +11002924 struct cfq_queue *cfqq = cfqd->active_queue;
Jan Karae7954212016-01-12 16:24:15 +01002925 struct cfq_rb_root *st = cfqq->service_tree;
Tejun Heoc5869802011-12-14 00:33:41 +01002926 struct cfq_io_cq *cic;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06002927 u64 sl, group_idle = 0;
2928 u64 now = ktime_get_ns();
Jens Axboe7b14e3b2006-02-28 09:35:11 +01002929
Jens Axboea68bbddba2008-09-24 13:03:33 +02002930 /*
Jens Axboef7d7b7a2008-09-25 11:37:50 +02002931 * SSD device without seek penalty, disable idling. But only do so
2932 * for devices that support queuing, otherwise we still have a problem
2933 * with sync vs async workloads.
Jens Axboea68bbddba2008-09-24 13:03:33 +02002934 */
Jens Axboef7d7b7a2008-09-25 11:37:50 +02002935 if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag)
Jens Axboea68bbddba2008-09-24 13:03:33 +02002936 return;
2937
Jens Axboedd67d052006-06-21 09:36:18 +02002938 WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list));
Jens Axboe6d048f52007-04-25 12:44:27 +02002939 WARN_ON(cfq_cfqq_slice_new(cfqq));
Jens Axboe22e2c502005-06-27 10:55:12 +02002940
2941 /*
2942 * idle is disabled, either manually or by past process history
2943 */
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02002944 if (!cfq_should_idle(cfqd, cfqq)) {
2945 /* no queue idling. Check for group idling */
2946 if (cfqd->cfq_group_idle)
2947 group_idle = cfqd->cfq_group_idle;
2948 else
2949 return;
2950 }
Jens Axboe6d048f52007-04-25 12:44:27 +02002951
Jens Axboe22e2c502005-06-27 10:55:12 +02002952 /*
Corrado Zoccolo8e550632009-11-26 10:02:58 +01002953 * still active requests from this queue, don't idle
Jens Axboe7b679132008-05-30 12:23:07 +02002954 */
Corrado Zoccolo8e550632009-11-26 10:02:58 +01002955 if (cfqq->dispatched)
Jens Axboe7b679132008-05-30 12:23:07 +02002956 return;
2957
2958 /*
Jens Axboe22e2c502005-06-27 10:55:12 +02002959 * task has exited, don't wait
2960 */
Jens Axboe206dc692006-03-28 13:03:44 +02002961 cic = cfqd->active_cic;
Tejun Heof6e8d012012-03-05 13:15:26 -08002962 if (!cic || !atomic_read(&cic->icq.ioc->active_ref))
Jens Axboe6d048f52007-04-25 12:44:27 +02002963 return;
2964
Corrado Zoccolo355b6592009-10-08 08:43:32 +02002965 /*
2966 * If our average think time is larger than the remaining time
2967 * slice, then don't idle. This avoids overrunning the allotted
2968 * time slice.
2969 */
Shaohua Li383cd722011-07-12 14:24:35 +02002970 if (sample_valid(cic->ttime.ttime_samples) &&
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06002971 (cfqq->slice_end - now < cic->ttime.ttime_mean)) {
2972 cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%llu",
Shaohua Li383cd722011-07-12 14:24:35 +02002973 cic->ttime.ttime_mean);
Corrado Zoccolo355b6592009-10-08 08:43:32 +02002974 return;
Divyesh Shahb1ffe732010-03-25 15:45:03 +01002975 }
Corrado Zoccolo355b6592009-10-08 08:43:32 +02002976
Jan Karae7954212016-01-12 16:24:15 +01002977 /*
2978 * There are other queues in the group or this is the only group and
2979 * it has too big thinktime, don't do group idle.
2980 */
2981 if (group_idle &&
2982 (cfqq->cfqg->nr_cfqq > 1 ||
2983 cfq_io_thinktime_big(cfqd, &st->ttime, true)))
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02002984 return;
2985
Jens Axboe3b181522005-06-27 10:56:24 +02002986 cfq_mark_cfqq_wait_request(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02002987
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02002988 if (group_idle)
2989 sl = cfqd->cfq_group_idle;
2990 else
2991 sl = cfqd->cfq_slice_idle;
Jens Axboe206dc692006-03-28 13:03:44 +02002992
Jan Kara91148322016-06-08 15:11:39 +02002993 hrtimer_start(&cfqd->idle_slice_timer, ns_to_ktime(sl),
2994 HRTIMER_MODE_REL);
Tejun Heo155fead2012-04-01 14:38:44 -07002995 cfqg_stats_set_start_idle_time(cfqq->cfqg);
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06002996 cfq_log_cfqq(cfqd, cfqq, "arm_idle: %llu group_idle: %d", sl,
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02002997 group_idle ? 1 : 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002998}
2999
Jens Axboe498d3aa22007-04-26 12:54:48 +02003000/*
3001 * Move request from internal lists to the request queue dispatch list.
3002 */
Jens Axboe165125e2007-07-24 09:28:11 +02003003static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003004{
Jens Axboe3ed9a292007-04-23 08:33:33 +02003005 struct cfq_data *cfqd = q->elevator->elevator_data;
Jens Axboe5e705372006-07-13 12:39:25 +02003006 struct cfq_queue *cfqq = RQ_CFQQ(rq);
Jens Axboe22e2c502005-06-27 10:55:12 +02003007
Jens Axboe7b679132008-05-30 12:23:07 +02003008 cfq_log_cfqq(cfqd, cfqq, "dispatch_insert");
3009
Jeff Moyer06d21882009-09-11 17:08:59 +02003010 cfqq->next_rq = cfq_find_next_rq(cfqd, cfqq, rq);
Jens Axboe5380a102006-07-13 12:37:56 +02003011 cfq_remove_request(rq);
Jens Axboe6d048f52007-04-25 12:44:27 +02003012 cfqq->dispatched++;
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02003013 (RQ_CFQG(rq))->dispatched++;
Jens Axboe5380a102006-07-13 12:37:56 +02003014 elv_dispatch_sort(q, rq);
Jens Axboe3ed9a292007-04-23 08:33:33 +02003015
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01003016 cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++;
Vivek Goyalc4e78932010-08-23 12:25:03 +02003017 cfqq->nr_sectors += blk_rq_sectors(rq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003018}
3019
3020/*
3021 * return expired entry, or NULL to just start from scratch in rbtree
3022 */
Jens Axboefebffd62008-01-28 13:19:43 +01003023static struct request *cfq_check_fifo(struct cfq_queue *cfqq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003024{
Jens Axboe30996f42009-10-05 11:03:39 +02003025 struct request *rq = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003026
Jens Axboe3b181522005-06-27 10:56:24 +02003027 if (cfq_cfqq_fifo_expire(cfqq))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003028 return NULL;
Jens Axboecb887412007-01-19 12:01:16 +11003029
3030 cfq_mark_cfqq_fifo_expire(cfqq);
3031
Jens Axboe89850f72006-07-22 16:48:31 +02003032 if (list_empty(&cfqq->fifo))
3033 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003034
Jens Axboe89850f72006-07-22 16:48:31 +02003035 rq = rq_entry_fifo(cfqq->fifo.next);
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003036 if (ktime_get_ns() < rq->fifo_time)
Jens Axboe7b679132008-05-30 12:23:07 +02003037 rq = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003038
Jens Axboe6d048f52007-04-25 12:44:27 +02003039 return rq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003040}
3041
Jens Axboe22e2c502005-06-27 10:55:12 +02003042static inline int
3043cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
3044{
3045 const int base_rq = cfqd->cfq_slice_async_rq;
3046
3047 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
3048
Namhyung Kimb9f8ce02011-05-24 10:23:21 +02003049 return 2 * base_rq * (IOPRIO_BE_NR - cfqq->ioprio);
Jens Axboe22e2c502005-06-27 10:55:12 +02003050}
3051
3052/*
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003053 * Must be called with the queue_lock held.
3054 */
3055static int cfqq_process_refs(struct cfq_queue *cfqq)
3056{
3057 int process_refs, io_refs;
3058
3059 io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE];
Shaohua Li30d7b942011-01-07 08:46:59 +01003060 process_refs = cfqq->ref - io_refs;
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003061 BUG_ON(process_refs < 0);
3062 return process_refs;
3063}
3064
3065static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq)
3066{
Jeff Moyere6c5bc72009-10-23 17:14:52 -04003067 int process_refs, new_process_refs;
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003068 struct cfq_queue *__cfqq;
3069
Jeff Moyerc10b61f2010-06-17 10:19:11 -04003070 /*
3071 * If there are no process references on the new_cfqq, then it is
3072 * unsafe to follow the ->new_cfqq chain as other cfqq's in the
3073 * chain may have dropped their last reference (not just their
3074 * last process reference).
3075 */
3076 if (!cfqq_process_refs(new_cfqq))
3077 return;
3078
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003079 /* Avoid a circular list and skip interim queue merges */
3080 while ((__cfqq = new_cfqq->new_cfqq)) {
3081 if (__cfqq == cfqq)
3082 return;
3083 new_cfqq = __cfqq;
3084 }
3085
3086 process_refs = cfqq_process_refs(cfqq);
Jeff Moyerc10b61f2010-06-17 10:19:11 -04003087 new_process_refs = cfqq_process_refs(new_cfqq);
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003088 /*
3089 * If the process for the cfqq has gone away, there is no
3090 * sense in merging the queues.
3091 */
Jeff Moyerc10b61f2010-06-17 10:19:11 -04003092 if (process_refs == 0 || new_process_refs == 0)
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003093 return;
3094
Jeff Moyere6c5bc72009-10-23 17:14:52 -04003095 /*
3096 * Merge in the direction of the lesser amount of work.
3097 */
Jeff Moyere6c5bc72009-10-23 17:14:52 -04003098 if (new_process_refs >= process_refs) {
3099 cfqq->new_cfqq = new_cfqq;
Shaohua Li30d7b942011-01-07 08:46:59 +01003100 new_cfqq->ref += process_refs;
Jeff Moyere6c5bc72009-10-23 17:14:52 -04003101 } else {
3102 new_cfqq->new_cfqq = cfqq;
Shaohua Li30d7b942011-01-07 08:46:59 +01003103 cfqq->ref += new_process_refs;
Jeff Moyere6c5bc72009-10-23 17:14:52 -04003104 }
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003105}
3106
Vivek Goyal6d816ec2012-10-03 16:56:59 -04003107static enum wl_type_t cfq_choose_wl_type(struct cfq_data *cfqd,
Vivek Goyal3bf10fe2012-10-03 16:56:56 -04003108 struct cfq_group *cfqg, enum wl_class_t wl_class)
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003109{
3110 struct cfq_queue *queue;
3111 int i;
3112 bool key_valid = false;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003113 u64 lowest_key = 0;
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003114 enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD;
3115
Vivek Goyal65b32a52009-12-16 17:52:59 -05003116 for (i = 0; i <= SYNC_WORKLOAD; ++i) {
3117 /* select the one with lowest rb_key */
Vivek Goyal34b98d02012-10-03 16:56:58 -04003118 queue = cfq_rb_first(st_for(cfqg, wl_class, i));
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003119 if (queue &&
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003120 (!key_valid || queue->rb_key < lowest_key)) {
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003121 lowest_key = queue->rb_key;
3122 cur_best = i;
3123 key_valid = true;
3124 }
3125 }
3126
3127 return cur_best;
3128}
3129
Vivek Goyal6d816ec2012-10-03 16:56:59 -04003130static void
3131choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg)
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003132{
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003133 u64 slice;
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003134 unsigned count;
Vivek Goyalcdb16e82009-12-03 12:59:38 -05003135 struct cfq_rb_root *st;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003136 u64 group_slice;
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04003137 enum wl_class_t original_class = cfqd->serving_wl_class;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003138 u64 now = ktime_get_ns();
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05003139
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003140 /* Choose next priority. RT > BE > IDLE */
Vivek Goyal58ff82f2009-12-03 12:59:44 -05003141 if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04003142 cfqd->serving_wl_class = RT_WORKLOAD;
Vivek Goyal58ff82f2009-12-03 12:59:44 -05003143 else if (cfq_group_busy_queues_wl(BE_WORKLOAD, cfqd, cfqg))
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04003144 cfqd->serving_wl_class = BE_WORKLOAD;
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003145 else {
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04003146 cfqd->serving_wl_class = IDLE_WORKLOAD;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003147 cfqd->workload_expires = now + jiffies_to_nsecs(1);
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003148 return;
3149 }
3150
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04003151 if (original_class != cfqd->serving_wl_class)
Shaohua Li writese4ea0c12010-12-13 14:32:22 +01003152 goto new_workload;
3153
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003154 /*
3155 * For RT and BE, we have to choose also the type
3156 * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload
3157 * expiration time
3158 */
Vivek Goyal34b98d02012-10-03 16:56:58 -04003159 st = st_for(cfqg, cfqd->serving_wl_class, cfqd->serving_wl_type);
Vivek Goyalcdb16e82009-12-03 12:59:38 -05003160 count = st->count;
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003161
3162 /*
Vivek Goyal65b32a52009-12-16 17:52:59 -05003163 * check workload expiration, and that we still have other queues ready
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003164 */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003165 if (count && !(now > cfqd->workload_expires))
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003166 return;
3167
Shaohua Li writese4ea0c12010-12-13 14:32:22 +01003168new_workload:
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003169 /* otherwise select new workload type */
Vivek Goyal6d816ec2012-10-03 16:56:59 -04003170 cfqd->serving_wl_type = cfq_choose_wl_type(cfqd, cfqg,
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04003171 cfqd->serving_wl_class);
Vivek Goyal34b98d02012-10-03 16:56:58 -04003172 st = st_for(cfqg, cfqd->serving_wl_class, cfqd->serving_wl_type);
Vivek Goyalcdb16e82009-12-03 12:59:38 -05003173 count = st->count;
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003174
3175 /*
3176 * the workload slice is computed as a fraction of target latency
3177 * proportional to the number of queues in that workload, over
3178 * all the queues in the same priority class
3179 */
Vivek Goyal58ff82f2009-12-03 12:59:44 -05003180 group_slice = cfq_group_slice(cfqd, cfqg);
3181
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003182 slice = div_u64(group_slice * count,
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04003183 max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_wl_class],
3184 cfq_group_busy_queues_wl(cfqd->serving_wl_class, cfqd,
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003185 cfqg)));
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003186
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04003187 if (cfqd->serving_wl_type == ASYNC_WORKLOAD) {
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003188 u64 tmp;
Vivek Goyalf26bd1f2009-12-03 12:59:54 -05003189
3190 /*
3191 * Async queues are currently system wide. Just taking
3192 * proportion of queues with-in same group will lead to higher
3193 * async ratio system wide as generally root group is going
3194 * to have higher weight. A more accurate thing would be to
3195 * calculate system wide asnc/sync ratio.
3196 */
Tao Ma5bf14c02012-04-01 14:33:39 -07003197 tmp = cfqd->cfq_target_latency *
3198 cfqg_busy_async_queues(cfqd, cfqg);
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003199 tmp = div_u64(tmp, cfqd->busy_queues);
3200 slice = min_t(u64, slice, tmp);
Vivek Goyalf26bd1f2009-12-03 12:59:54 -05003201
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003202 /* async workload slice is scaled down according to
3203 * the sync/async slice ratio. */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003204 slice = div64_u64(slice*cfqd->cfq_slice[0], cfqd->cfq_slice[1]);
Vivek Goyalf26bd1f2009-12-03 12:59:54 -05003205 } else
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003206 /* sync workload slice is at least 2 * cfq_slice_idle */
3207 slice = max(slice, 2 * cfqd->cfq_slice_idle);
3208
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003209 slice = max_t(u64, slice, CFQ_MIN_TT);
3210 cfq_log(cfqd, "workload slice:%llu", slice);
3211 cfqd->workload_expires = now + slice;
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003212}
3213
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05003214static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
3215{
3216 struct cfq_rb_root *st = &cfqd->grp_service_tree;
Vivek Goyal25bc6b02009-12-03 12:59:43 -05003217 struct cfq_group *cfqg;
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05003218
3219 if (RB_EMPTY_ROOT(&st->rb))
3220 return NULL;
Vivek Goyal25bc6b02009-12-03 12:59:43 -05003221 cfqg = cfq_rb_first_group(st);
Vivek Goyal25bc6b02009-12-03 12:59:43 -05003222 update_min_vdisktime(st);
3223 return cfqg;
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05003224}
3225
Vivek Goyalcdb16e82009-12-03 12:59:38 -05003226static void cfq_choose_cfqg(struct cfq_data *cfqd)
3227{
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05003228 struct cfq_group *cfqg = cfq_get_next_cfqg(cfqd);
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003229 u64 now = ktime_get_ns();
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05003230
3231 cfqd->serving_group = cfqg;
Vivek Goyaldae739e2009-12-03 12:59:45 -05003232
3233 /* Restore the workload type data */
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04003234 if (cfqg->saved_wl_slice) {
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003235 cfqd->workload_expires = now + cfqg->saved_wl_slice;
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04003236 cfqd->serving_wl_type = cfqg->saved_wl_type;
3237 cfqd->serving_wl_class = cfqg->saved_wl_class;
Gui Jianfeng66ae2912009-12-15 10:08:45 +01003238 } else
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003239 cfqd->workload_expires = now - 1;
Gui Jianfeng66ae2912009-12-15 10:08:45 +01003240
Vivek Goyal6d816ec2012-10-03 16:56:59 -04003241 choose_wl_class_and_type(cfqd, cfqg);
Vivek Goyalcdb16e82009-12-03 12:59:38 -05003242}
3243
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003244/*
Jens Axboe498d3aa22007-04-26 12:54:48 +02003245 * Select a queue for service. If we have a current active queue,
3246 * check whether to continue servicing it, or retrieve and set a new one.
Jens Axboe22e2c502005-06-27 10:55:12 +02003247 */
Tejun Heo1b5ed5e12005-11-10 08:49:19 +01003248static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
Jens Axboe22e2c502005-06-27 10:55:12 +02003249{
Jens Axboea36e71f2009-04-15 12:15:11 +02003250 struct cfq_queue *cfqq, *new_cfqq = NULL;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003251 u64 now = ktime_get_ns();
Jens Axboe22e2c502005-06-27 10:55:12 +02003252
3253 cfqq = cfqd->active_queue;
3254 if (!cfqq)
3255 goto new_queue;
3256
Vivek Goyalf04a6422009-12-03 12:59:40 -05003257 if (!cfqd->rq_queued)
3258 return NULL;
Vivek Goyalc244bb52009-12-08 17:52:57 -05003259
3260 /*
3261 * We were waiting for group to get backlogged. Expire the queue
3262 */
3263 if (cfq_cfqq_wait_busy(cfqq) && !RB_EMPTY_ROOT(&cfqq->sort_list))
3264 goto expire;
3265
Jens Axboe22e2c502005-06-27 10:55:12 +02003266 /*
Jens Axboe6d048f52007-04-25 12:44:27 +02003267 * The active queue has run out of time, expire it and select new.
Jens Axboe22e2c502005-06-27 10:55:12 +02003268 */
Vivek Goyal7667aa02009-12-08 17:52:58 -05003269 if (cfq_slice_used(cfqq) && !cfq_cfqq_must_dispatch(cfqq)) {
3270 /*
3271 * If slice had not expired at the completion of last request
3272 * we might not have turned on wait_busy flag. Don't expire
3273 * the queue yet. Allow the group to get backlogged.
3274 *
3275 * The very fact that we have used the slice, that means we
3276 * have been idling all along on this queue and it should be
3277 * ok to wait for this request to complete.
3278 */
Vivek Goyal82bbbf22009-12-10 19:25:41 +01003279 if (cfqq->cfqg->nr_cfqq == 1 && RB_EMPTY_ROOT(&cfqq->sort_list)
3280 && cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) {
3281 cfqq = NULL;
Vivek Goyal7667aa02009-12-08 17:52:58 -05003282 goto keep_queue;
Vivek Goyal82bbbf22009-12-10 19:25:41 +01003283 } else
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02003284 goto check_group_idle;
Vivek Goyal7667aa02009-12-08 17:52:58 -05003285 }
Jens Axboe22e2c502005-06-27 10:55:12 +02003286
3287 /*
Jens Axboe6d048f52007-04-25 12:44:27 +02003288 * The active queue has requests and isn't expired, allow it to
3289 * dispatch.
Jens Axboe22e2c502005-06-27 10:55:12 +02003290 */
Jens Axboedd67d052006-06-21 09:36:18 +02003291 if (!RB_EMPTY_ROOT(&cfqq->sort_list))
Jens Axboe22e2c502005-06-27 10:55:12 +02003292 goto keep_queue;
Jens Axboe6d048f52007-04-25 12:44:27 +02003293
3294 /*
Jens Axboea36e71f2009-04-15 12:15:11 +02003295 * If another queue has a request waiting within our mean seek
3296 * distance, let it run. The expire code will check for close
3297 * cooperators and put the close queue at the front of the service
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003298 * tree. If possible, merge the expiring queue with the new cfqq.
Jens Axboea36e71f2009-04-15 12:15:11 +02003299 */
Jeff Moyerb3b6d042009-10-23 17:14:51 -04003300 new_cfqq = cfq_close_cooperator(cfqd, cfqq);
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003301 if (new_cfqq) {
3302 if (!cfqq->new_cfqq)
3303 cfq_setup_merge(cfqq, new_cfqq);
Jens Axboea36e71f2009-04-15 12:15:11 +02003304 goto expire;
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003305 }
Jens Axboea36e71f2009-04-15 12:15:11 +02003306
3307 /*
Jens Axboe6d048f52007-04-25 12:44:27 +02003308 * No requests pending. If the active queue still has requests in
3309 * flight or is idling for a new request, allow either of these
3310 * conditions to happen (or time out) before selecting a new queue.
3311 */
Jan Kara91148322016-06-08 15:11:39 +02003312 if (hrtimer_active(&cfqd->idle_slice_timer)) {
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02003313 cfqq = NULL;
3314 goto keep_queue;
3315 }
3316
Shaohua Li8e1ac662010-11-08 15:01:04 +01003317 /*
3318 * This is a deep seek queue, but the device is much faster than
3319 * the queue can deliver, don't idle
3320 **/
3321 if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) &&
3322 (cfq_cfqq_slice_new(cfqq) ||
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003323 (cfqq->slice_end - now > now - cfqq->slice_start))) {
Shaohua Li8e1ac662010-11-08 15:01:04 +01003324 cfq_clear_cfqq_deep(cfqq);
3325 cfq_clear_cfqq_idle_window(cfqq);
3326 }
3327
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02003328 if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) {
3329 cfqq = NULL;
3330 goto keep_queue;
3331 }
3332
3333 /*
3334 * If group idle is enabled and there are requests dispatched from
3335 * this group, wait for requests to complete.
3336 */
3337check_group_idle:
Shaohua Li7700fc42011-07-12 14:24:56 +02003338 if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1 &&
3339 cfqq->cfqg->dispatched &&
3340 !cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true)) {
Jens Axboecaaa5f92006-06-16 11:23:00 +02003341 cfqq = NULL;
3342 goto keep_queue;
Jens Axboe22e2c502005-06-27 10:55:12 +02003343 }
3344
Jens Axboe3b181522005-06-27 10:56:24 +02003345expire:
Vivek Goyale5ff0822010-04-26 19:25:11 +02003346 cfq_slice_expired(cfqd, 0);
Jens Axboe3b181522005-06-27 10:56:24 +02003347new_queue:
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003348 /*
3349 * Current queue expired. Check if we have to switch to a new
3350 * service tree
3351 */
3352 if (!new_cfqq)
Vivek Goyalcdb16e82009-12-03 12:59:38 -05003353 cfq_choose_cfqg(cfqd);
Corrado Zoccolo718eee02009-10-26 22:45:29 +01003354
Jens Axboea36e71f2009-04-15 12:15:11 +02003355 cfqq = cfq_set_active_queue(cfqd, new_cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02003356keep_queue:
Jens Axboe3b181522005-06-27 10:56:24 +02003357 return cfqq;
Jens Axboe22e2c502005-06-27 10:55:12 +02003358}
3359
Jens Axboefebffd62008-01-28 13:19:43 +01003360static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq)
Jens Axboed9e76202007-04-20 14:27:50 +02003361{
3362 int dispatched = 0;
3363
3364 while (cfqq->next_rq) {
3365 cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq);
3366 dispatched++;
3367 }
3368
3369 BUG_ON(!list_empty(&cfqq->fifo));
Vivek Goyalf04a6422009-12-03 12:59:40 -05003370
3371 /* By default cfqq is not expired if it is empty. Do it explicitly */
Vivek Goyale5ff0822010-04-26 19:25:11 +02003372 __cfq_slice_expired(cfqq->cfqd, cfqq, 0);
Jens Axboed9e76202007-04-20 14:27:50 +02003373 return dispatched;
3374}
3375
Jens Axboe498d3aa22007-04-26 12:54:48 +02003376/*
3377 * Drain our current requests. Used for barriers and when switching
3378 * io schedulers on-the-fly.
3379 */
Jens Axboed9e76202007-04-20 14:27:50 +02003380static int cfq_forced_dispatch(struct cfq_data *cfqd)
Tejun Heo1b5ed5e12005-11-10 08:49:19 +01003381{
Jens Axboe08717142008-01-28 11:38:15 +01003382 struct cfq_queue *cfqq;
Jens Axboed9e76202007-04-20 14:27:50 +02003383 int dispatched = 0;
Vivek Goyalcdb16e82009-12-03 12:59:38 -05003384
Divyesh Shah3440c492010-04-09 09:29:57 +02003385 /* Expire the timeslice of the current active queue first */
Vivek Goyale5ff0822010-04-26 19:25:11 +02003386 cfq_slice_expired(cfqd, 0);
Divyesh Shah3440c492010-04-09 09:29:57 +02003387 while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL) {
3388 __cfq_set_active_queue(cfqd, cfqq);
Vivek Goyalf04a6422009-12-03 12:59:40 -05003389 dispatched += __cfq_forced_dispatch_cfqq(cfqq);
Divyesh Shah3440c492010-04-09 09:29:57 +02003390 }
Tejun Heo1b5ed5e12005-11-10 08:49:19 +01003391
Tejun Heo1b5ed5e12005-11-10 08:49:19 +01003392 BUG_ON(cfqd->busy_queues);
3393
Jeff Moyer6923715a2009-06-12 15:29:30 +02003394 cfq_log(cfqd, "forced_dispatch=%d", dispatched);
Tejun Heo1b5ed5e12005-11-10 08:49:19 +01003395 return dispatched;
3396}
3397
Shaohua Liabc3c742010-03-01 09:20:54 +01003398static inline bool cfq_slice_used_soon(struct cfq_data *cfqd,
3399 struct cfq_queue *cfqq)
3400{
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003401 u64 now = ktime_get_ns();
3402
Shaohua Liabc3c742010-03-01 09:20:54 +01003403 /* the queue hasn't finished any request, can't estimate */
3404 if (cfq_cfqq_slice_new(cfqq))
Shaohua Lic1e44752010-11-08 15:01:02 +01003405 return true;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003406 if (now + cfqd->cfq_slice_idle * cfqq->dispatched > cfqq->slice_end)
Shaohua Lic1e44752010-11-08 15:01:02 +01003407 return true;
Shaohua Liabc3c742010-03-01 09:20:54 +01003408
Shaohua Lic1e44752010-11-08 15:01:02 +01003409 return false;
Shaohua Liabc3c742010-03-01 09:20:54 +01003410}
3411
Jens Axboe0b182d62009-10-06 20:49:37 +02003412static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
Jens Axboe2f5cb732009-04-07 08:51:19 +02003413{
Jens Axboe2f5cb732009-04-07 08:51:19 +02003414 unsigned int max_dispatch;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003415
Glauber Costa3932a862016-09-22 20:59:59 -04003416 if (cfq_cfqq_must_dispatch(cfqq))
3417 return true;
3418
Jens Axboe2f5cb732009-04-07 08:51:19 +02003419 /*
Jens Axboe5ad531d2009-07-03 12:57:48 +02003420 * Drain async requests before we start sync IO
3421 */
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01003422 if (cfq_should_idle(cfqd, cfqq) && cfqd->rq_in_flight[BLK_RW_ASYNC])
Jens Axboe0b182d62009-10-06 20:49:37 +02003423 return false;
Jens Axboe5ad531d2009-07-03 12:57:48 +02003424
3425 /*
Jens Axboe2f5cb732009-04-07 08:51:19 +02003426 * If this is an async queue and we have sync IO in flight, let it wait
3427 */
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01003428 if (cfqd->rq_in_flight[BLK_RW_SYNC] && !cfq_cfqq_sync(cfqq))
Jens Axboe0b182d62009-10-06 20:49:37 +02003429 return false;
Jens Axboe2f5cb732009-04-07 08:51:19 +02003430
Shaohua Liabc3c742010-03-01 09:20:54 +01003431 max_dispatch = max_t(unsigned int, cfqd->cfq_quantum / 2, 1);
Jens Axboe2f5cb732009-04-07 08:51:19 +02003432 if (cfq_class_idle(cfqq))
3433 max_dispatch = 1;
3434
3435 /*
3436 * Does this cfqq already have too much IO in flight?
3437 */
3438 if (cfqq->dispatched >= max_dispatch) {
Shaohua Lief8a41d2011-03-07 09:26:29 +01003439 bool promote_sync = false;
Jens Axboe2f5cb732009-04-07 08:51:19 +02003440 /*
3441 * idle queue must always only have a single IO in flight
3442 */
Jens Axboe3ed9a292007-04-23 08:33:33 +02003443 if (cfq_class_idle(cfqq))
Jens Axboe0b182d62009-10-06 20:49:37 +02003444 return false;
Jens Axboe3ed9a292007-04-23 08:33:33 +02003445
Jens Axboe2f5cb732009-04-07 08:51:19 +02003446 /*
Li, Shaohuac4ade942011-03-23 08:30:34 +01003447 * If there is only one sync queue
3448 * we can ignore async queue here and give the sync
Shaohua Lief8a41d2011-03-07 09:26:29 +01003449 * queue no dispatch limit. The reason is a sync queue can
3450 * preempt async queue, limiting the sync queue doesn't make
3451 * sense. This is useful for aiostress test.
3452 */
Li, Shaohuac4ade942011-03-23 08:30:34 +01003453 if (cfq_cfqq_sync(cfqq) && cfqd->busy_sync_queues == 1)
3454 promote_sync = true;
Shaohua Lief8a41d2011-03-07 09:26:29 +01003455
3456 /*
Jens Axboe2f5cb732009-04-07 08:51:19 +02003457 * We have other queues, don't allow more IO from this one
3458 */
Shaohua Lief8a41d2011-03-07 09:26:29 +01003459 if (cfqd->busy_queues > 1 && cfq_slice_used_soon(cfqd, cfqq) &&
3460 !promote_sync)
Jens Axboe0b182d62009-10-06 20:49:37 +02003461 return false;
Jens Axboe9ede2092007-01-19 12:11:44 +11003462
Jens Axboe2f5cb732009-04-07 08:51:19 +02003463 /*
Shaohua Li474b18c2009-12-03 12:58:05 +01003464 * Sole queue user, no limit
Vivek Goyal365722b2009-10-03 15:21:27 +02003465 */
Shaohua Lief8a41d2011-03-07 09:26:29 +01003466 if (cfqd->busy_queues == 1 || promote_sync)
Shaohua Liabc3c742010-03-01 09:20:54 +01003467 max_dispatch = -1;
3468 else
3469 /*
3470 * Normally we start throttling cfqq when cfq_quantum/2
3471 * requests have been dispatched. But we can drive
3472 * deeper queue depths at the beginning of slice
3473 * subjected to upper limit of cfq_quantum.
3474 * */
3475 max_dispatch = cfqd->cfq_quantum;
Jens Axboe8e296752009-10-03 16:26:03 +02003476 }
3477
3478 /*
3479 * Async queues must wait a bit before being allowed dispatch.
3480 * We also ramp up the dispatch depth gradually for async IO,
3481 * based on the last sync IO we serviced
3482 */
Jens Axboe963b72f2009-10-03 19:42:18 +02003483 if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) {
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003484 u64 last_sync = ktime_get_ns() - cfqd->last_delayed_sync;
Jens Axboe8e296752009-10-03 16:26:03 +02003485 unsigned int depth;
Vivek Goyal365722b2009-10-03 15:21:27 +02003486
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003487 depth = div64_u64(last_sync, cfqd->cfq_slice[1]);
Jens Axboee00c54c2009-10-04 20:36:19 +02003488 if (!depth && !cfqq->dispatched)
3489 depth = 1;
Jens Axboe8e296752009-10-03 16:26:03 +02003490 if (depth < max_dispatch)
3491 max_dispatch = depth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003492 }
3493
Jens Axboe0b182d62009-10-06 20:49:37 +02003494 /*
3495 * If we're below the current max, allow a dispatch
3496 */
3497 return cfqq->dispatched < max_dispatch;
3498}
3499
3500/*
3501 * Dispatch a request from cfqq, moving them to the request queue
3502 * dispatch list.
3503 */
3504static bool cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq)
3505{
3506 struct request *rq;
3507
3508 BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
3509
Glauber Costa3932a862016-09-22 20:59:59 -04003510 rq = cfq_check_fifo(cfqq);
3511 if (rq)
3512 cfq_mark_cfqq_must_dispatch(cfqq);
3513
Jens Axboe0b182d62009-10-06 20:49:37 +02003514 if (!cfq_may_dispatch(cfqd, cfqq))
3515 return false;
3516
3517 /*
3518 * follow expired path, else get first next available
3519 */
Jens Axboe0b182d62009-10-06 20:49:37 +02003520 if (!rq)
3521 rq = cfqq->next_rq;
Glauber Costa3932a862016-09-22 20:59:59 -04003522 else
3523 cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq);
Jens Axboe0b182d62009-10-06 20:49:37 +02003524
3525 /*
3526 * insert request into driver dispatch list
3527 */
3528 cfq_dispatch_insert(cfqd->queue, rq);
3529
3530 if (!cfqd->active_cic) {
Tejun Heoc5869802011-12-14 00:33:41 +01003531 struct cfq_io_cq *cic = RQ_CIC(rq);
Jens Axboe0b182d62009-10-06 20:49:37 +02003532
Tejun Heoc5869802011-12-14 00:33:41 +01003533 atomic_long_inc(&cic->icq.ioc->refcount);
Jens Axboe0b182d62009-10-06 20:49:37 +02003534 cfqd->active_cic = cic;
3535 }
3536
3537 return true;
3538}
3539
3540/*
3541 * Find the cfqq that we need to service and move a request from that to the
3542 * dispatch list
3543 */
3544static int cfq_dispatch_requests(struct request_queue *q, int force)
3545{
3546 struct cfq_data *cfqd = q->elevator->elevator_data;
3547 struct cfq_queue *cfqq;
3548
3549 if (!cfqd->busy_queues)
3550 return 0;
3551
3552 if (unlikely(force))
3553 return cfq_forced_dispatch(cfqd);
3554
3555 cfqq = cfq_select_queue(cfqd);
3556 if (!cfqq)
Jens Axboe8e296752009-10-03 16:26:03 +02003557 return 0;
3558
Jens Axboe2f5cb732009-04-07 08:51:19 +02003559 /*
Jens Axboe0b182d62009-10-06 20:49:37 +02003560 * Dispatch a request from this cfqq, if it is allowed
Jens Axboe2f5cb732009-04-07 08:51:19 +02003561 */
Jens Axboe0b182d62009-10-06 20:49:37 +02003562 if (!cfq_dispatch_request(cfqd, cfqq))
3563 return 0;
3564
Jens Axboe2f5cb732009-04-07 08:51:19 +02003565 cfqq->slice_dispatch++;
Jens Axboeb0291952009-04-07 11:38:31 +02003566 cfq_clear_cfqq_must_dispatch(cfqq);
Jens Axboe2f5cb732009-04-07 08:51:19 +02003567
3568 /*
3569 * expire an async queue immediately if it has used up its slice. idle
3570 * queue always expire after 1 dispatch round.
3571 */
3572 if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) &&
3573 cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
3574 cfq_class_idle(cfqq))) {
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003575 cfqq->slice_end = ktime_get_ns() + 1;
Vivek Goyale5ff0822010-04-26 19:25:11 +02003576 cfq_slice_expired(cfqd, 0);
Jens Axboe2f5cb732009-04-07 08:51:19 +02003577 }
3578
Shan Weib217a902009-09-01 10:06:42 +02003579 cfq_log_cfqq(cfqd, cfqq, "dispatched a request");
Jens Axboe2f5cb732009-04-07 08:51:19 +02003580 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003581}
3582
Linus Torvalds1da177e2005-04-16 15:20:36 -07003583/*
Jens Axboe5e705372006-07-13 12:39:25 +02003584 * task holds one reference to the queue, dropped when task exits. each rq
3585 * in-flight on this queue also holds a reference, dropped when rq is freed.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003586 *
Vivek Goyalb1c35762009-12-03 12:59:47 -05003587 * Each cfq queue took a reference on the parent group. Drop it now.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003588 * queue lock must be held here.
3589 */
3590static void cfq_put_queue(struct cfq_queue *cfqq)
3591{
Jens Axboe22e2c502005-06-27 10:55:12 +02003592 struct cfq_data *cfqd = cfqq->cfqd;
Justin TerAvest0bbfeb82011-03-01 15:05:08 -05003593 struct cfq_group *cfqg;
Jens Axboe22e2c502005-06-27 10:55:12 +02003594
Shaohua Li30d7b942011-01-07 08:46:59 +01003595 BUG_ON(cfqq->ref <= 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003596
Shaohua Li30d7b942011-01-07 08:46:59 +01003597 cfqq->ref--;
3598 if (cfqq->ref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003599 return;
3600
Jens Axboe7b679132008-05-30 12:23:07 +02003601 cfq_log_cfqq(cfqd, cfqq, "put_queue");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003602 BUG_ON(rb_first(&cfqq->sort_list));
Jens Axboe22e2c502005-06-27 10:55:12 +02003603 BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
Vivek Goyalb1c35762009-12-03 12:59:47 -05003604 cfqg = cfqq->cfqg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003605
Jens Axboe28f95cbc2007-01-19 12:09:53 +11003606 if (unlikely(cfqd->active_queue == cfqq)) {
Vivek Goyale5ff0822010-04-26 19:25:11 +02003607 __cfq_slice_expired(cfqd, cfqq, 0);
Jens Axboe23e018a2009-10-05 08:52:35 +02003608 cfq_schedule_dispatch(cfqd);
Jens Axboe28f95cbc2007-01-19 12:09:53 +11003609 }
Jens Axboe22e2c502005-06-27 10:55:12 +02003610
Vivek Goyalf04a6422009-12-03 12:59:40 -05003611 BUG_ON(cfq_cfqq_on_rr(cfqq));
Linus Torvalds1da177e2005-04-16 15:20:36 -07003612 kmem_cache_free(cfq_pool, cfqq);
Tejun Heoeb7d8c072012-03-23 14:02:53 +01003613 cfqg_put(cfqg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003614}
3615
Shaohua Lid02a2c02010-05-25 10:16:53 +02003616static void cfq_put_cooperator(struct cfq_queue *cfqq)
Jens Axboe89850f72006-07-22 16:48:31 +02003617{
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003618 struct cfq_queue *__cfqq, *next;
3619
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003620 /*
3621 * If this queue was scheduled to merge with another queue, be
3622 * sure to drop the reference taken on that queue (and others in
3623 * the merge chain). See cfq_setup_merge and cfq_merge_cfqqs.
3624 */
3625 __cfqq = cfqq->new_cfqq;
3626 while (__cfqq) {
3627 if (__cfqq == cfqq) {
3628 WARN(1, "cfqq->new_cfqq loop detected\n");
3629 break;
3630 }
3631 next = __cfqq->new_cfqq;
3632 cfq_put_queue(__cfqq);
3633 __cfqq = next;
3634 }
Shaohua Lid02a2c02010-05-25 10:16:53 +02003635}
3636
3637static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
3638{
3639 if (unlikely(cfqq == cfqd->active_queue)) {
3640 __cfq_slice_expired(cfqd, cfqq, 0);
3641 cfq_schedule_dispatch(cfqd);
3642 }
3643
3644 cfq_put_cooperator(cfqq);
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04003645
Jens Axboe89850f72006-07-22 16:48:31 +02003646 cfq_put_queue(cfqq);
3647}
3648
Tejun Heo9b84cac2011-12-14 00:33:42 +01003649static void cfq_init_icq(struct io_cq *icq)
3650{
3651 struct cfq_io_cq *cic = icq_to_cic(icq);
3652
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003653 cic->ttime.last_end_request = ktime_get_ns();
Tejun Heo9b84cac2011-12-14 00:33:42 +01003654}
3655
Tejun Heoc5869802011-12-14 00:33:41 +01003656static void cfq_exit_icq(struct io_cq *icq)
Jens Axboe89850f72006-07-22 16:48:31 +02003657{
Tejun Heoc5869802011-12-14 00:33:41 +01003658 struct cfq_io_cq *cic = icq_to_cic(icq);
Tejun Heo283287a2011-12-14 00:33:38 +01003659 struct cfq_data *cfqd = cic_to_cfqd(cic);
Fabio Checconi4faa3c82008-04-10 08:28:01 +02003660
Tejun Heo563180a2015-08-18 14:55:00 -07003661 if (cic_to_cfqq(cic, false)) {
3662 cfq_exit_cfqq(cfqd, cic_to_cfqq(cic, false));
3663 cic_set_cfqq(cic, NULL, false);
Jens Axboe89850f72006-07-22 16:48:31 +02003664 }
3665
Tejun Heo563180a2015-08-18 14:55:00 -07003666 if (cic_to_cfqq(cic, true)) {
3667 cfq_exit_cfqq(cfqd, cic_to_cfqq(cic, true));
3668 cic_set_cfqq(cic, NULL, true);
Jens Axboe89850f72006-07-22 16:48:31 +02003669 }
Jens Axboe89850f72006-07-22 16:48:31 +02003670}
3671
Tejun Heoabede6d2012-03-19 15:10:57 -07003672static void cfq_init_prio_data(struct cfq_queue *cfqq, struct cfq_io_cq *cic)
Jens Axboe22e2c502005-06-27 10:55:12 +02003673{
3674 struct task_struct *tsk = current;
3675 int ioprio_class;
3676
Jens Axboe3b181522005-06-27 10:56:24 +02003677 if (!cfq_cfqq_prio_changed(cfqq))
Jens Axboe22e2c502005-06-27 10:55:12 +02003678 return;
3679
Tejun Heo598971b2012-03-19 15:10:58 -07003680 ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio);
Jens Axboe22e2c502005-06-27 10:55:12 +02003681 switch (ioprio_class) {
Jens Axboefe094d92008-01-31 13:08:54 +01003682 default:
3683 printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class);
3684 case IOPRIO_CLASS_NONE:
3685 /*
Jens Axboe6d63c272008-05-07 09:51:23 +02003686 * no prio set, inherit CPU scheduling settings
Jens Axboefe094d92008-01-31 13:08:54 +01003687 */
3688 cfqq->ioprio = task_nice_ioprio(tsk);
Jens Axboe6d63c272008-05-07 09:51:23 +02003689 cfqq->ioprio_class = task_nice_ioclass(tsk);
Jens Axboefe094d92008-01-31 13:08:54 +01003690 break;
3691 case IOPRIO_CLASS_RT:
Tejun Heo598971b2012-03-19 15:10:58 -07003692 cfqq->ioprio = IOPRIO_PRIO_DATA(cic->ioprio);
Jens Axboefe094d92008-01-31 13:08:54 +01003693 cfqq->ioprio_class = IOPRIO_CLASS_RT;
3694 break;
3695 case IOPRIO_CLASS_BE:
Tejun Heo598971b2012-03-19 15:10:58 -07003696 cfqq->ioprio = IOPRIO_PRIO_DATA(cic->ioprio);
Jens Axboefe094d92008-01-31 13:08:54 +01003697 cfqq->ioprio_class = IOPRIO_CLASS_BE;
3698 break;
3699 case IOPRIO_CLASS_IDLE:
3700 cfqq->ioprio_class = IOPRIO_CLASS_IDLE;
3701 cfqq->ioprio = 7;
3702 cfq_clear_cfqq_idle_window(cfqq);
3703 break;
Jens Axboe22e2c502005-06-27 10:55:12 +02003704 }
3705
3706 /*
3707 * keep track of original prio settings in case we have to temporarily
3708 * elevate the priority of this queue
3709 */
3710 cfqq->org_ioprio = cfqq->ioprio;
Jens Axboeb8269db2016-06-09 15:47:29 -06003711 cfqq->org_ioprio_class = cfqq->ioprio_class;
Jens Axboe3b181522005-06-27 10:56:24 +02003712 cfq_clear_cfqq_prio_changed(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02003713}
3714
Tejun Heo598971b2012-03-19 15:10:58 -07003715static void check_ioprio_changed(struct cfq_io_cq *cic, struct bio *bio)
Jens Axboe22e2c502005-06-27 10:55:12 +02003716{
Tejun Heo598971b2012-03-19 15:10:58 -07003717 int ioprio = cic->icq.ioc->ioprio;
Konstantin Khlebnikovbca4b912010-05-20 23:21:34 +04003718 struct cfq_data *cfqd = cic_to_cfqd(cic);
Al Viro478a82b2006-03-18 13:25:24 -05003719 struct cfq_queue *cfqq;
Jens Axboe35e60772006-06-14 09:10:45 +02003720
Tejun Heo598971b2012-03-19 15:10:58 -07003721 /*
3722 * Check whether ioprio has changed. The condition may trigger
3723 * spuriously on a newly created cic but there's no harm.
3724 */
3725 if (unlikely(!cfqd) || likely(cic->ioprio == ioprio))
Jens Axboecaaa5f92006-06-16 11:23:00 +02003726 return;
3727
Tejun Heo563180a2015-08-18 14:55:00 -07003728 cfqq = cic_to_cfqq(cic, false);
Jens Axboecaaa5f92006-06-16 11:23:00 +02003729 if (cfqq) {
Tejun Heo563180a2015-08-18 14:55:00 -07003730 cfq_put_queue(cfqq);
Tejun Heo2da8de02015-08-18 14:55:02 -07003731 cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic, bio);
Tejun Heo563180a2015-08-18 14:55:00 -07003732 cic_set_cfqq(cic, cfqq, false);
Jens Axboe22e2c502005-06-27 10:55:12 +02003733 }
Jens Axboecaaa5f92006-06-16 11:23:00 +02003734
Tejun Heo563180a2015-08-18 14:55:00 -07003735 cfqq = cic_to_cfqq(cic, true);
Jens Axboecaaa5f92006-06-16 11:23:00 +02003736 if (cfqq)
3737 cfq_mark_cfqq_prio_changed(cfqq);
Tejun Heo598971b2012-03-19 15:10:58 -07003738
3739 cic->ioprio = ioprio;
Jens Axboe22e2c502005-06-27 10:55:12 +02003740}
3741
Jens Axboed5036d72009-06-26 10:44:34 +02003742static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
Jens Axboea6151c32009-10-07 20:02:57 +02003743 pid_t pid, bool is_sync)
Jens Axboed5036d72009-06-26 10:44:34 +02003744{
3745 RB_CLEAR_NODE(&cfqq->rb_node);
3746 RB_CLEAR_NODE(&cfqq->p_node);
3747 INIT_LIST_HEAD(&cfqq->fifo);
3748
Shaohua Li30d7b942011-01-07 08:46:59 +01003749 cfqq->ref = 0;
Jens Axboed5036d72009-06-26 10:44:34 +02003750 cfqq->cfqd = cfqd;
3751
3752 cfq_mark_cfqq_prio_changed(cfqq);
3753
3754 if (is_sync) {
3755 if (!cfq_class_idle(cfqq))
3756 cfq_mark_cfqq_idle_window(cfqq);
3757 cfq_mark_cfqq_sync(cfqq);
3758 }
3759 cfqq->pid = pid;
3760}
3761
Vivek Goyal246103332009-12-03 12:59:51 -05003762#ifdef CONFIG_CFQ_GROUP_IOSCHED
Tejun Heo598971b2012-03-19 15:10:58 -07003763static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
Vivek Goyal246103332009-12-03 12:59:51 -05003764{
Konstantin Khlebnikovbca4b912010-05-20 23:21:34 +04003765 struct cfq_data *cfqd = cic_to_cfqd(cic);
Tejun Heo60a83702015-08-18 14:55:05 -07003766 struct cfq_queue *cfqq;
Tejun Heof4da8072014-09-08 08:15:20 +09003767 uint64_t serial_nr;
Jens Axboe87760e52016-11-09 12:38:14 -07003768 bool nonroot_cg;
Vivek Goyal246103332009-12-03 12:59:51 -05003769
Tejun Heo598971b2012-03-19 15:10:58 -07003770 rcu_read_lock();
Tejun Heof4da8072014-09-08 08:15:20 +09003771 serial_nr = bio_blkcg(bio)->css.serial_nr;
Jens Axboe87760e52016-11-09 12:38:14 -07003772 nonroot_cg = bio_blkcg(bio) != &blkcg_root;
Tejun Heo598971b2012-03-19 15:10:58 -07003773 rcu_read_unlock();
3774
3775 /*
3776 * Check whether blkcg has changed. The condition may trigger
3777 * spuriously on a newly created cic but there's no harm.
3778 */
Tejun Heof4da8072014-09-08 08:15:20 +09003779 if (unlikely(!cfqd) || likely(cic->blkcg_serial_nr == serial_nr))
Vivek Goyal246103332009-12-03 12:59:51 -05003780 return;
3781
Tejun Heo60a83702015-08-18 14:55:05 -07003782 /*
Jens Axboe87760e52016-11-09 12:38:14 -07003783 * If we have a non-root cgroup, we can depend on that to
3784 * do proper throttling of writes. Turn off wbt for that
Jens Axboefa224ee2016-11-28 09:25:50 -07003785 * case, if it was enabled by default.
Jens Axboe87760e52016-11-09 12:38:14 -07003786 */
Jens Axboefa224ee2016-11-28 09:25:50 -07003787 if (nonroot_cg)
3788 wbt_disable_default(cfqd->queue);
Jens Axboe87760e52016-11-09 12:38:14 -07003789
3790 /*
Tejun Heo60a83702015-08-18 14:55:05 -07003791 * Drop reference to queues. New queues will be assigned in new
3792 * group upon arrival of fresh requests.
3793 */
3794 cfqq = cic_to_cfqq(cic, false);
3795 if (cfqq) {
3796 cfq_log_cfqq(cfqd, cfqq, "changed cgroup");
3797 cic_set_cfqq(cic, NULL, false);
3798 cfq_put_queue(cfqq);
3799 }
3800
3801 cfqq = cic_to_cfqq(cic, true);
3802 if (cfqq) {
3803 cfq_log_cfqq(cfqd, cfqq, "changed cgroup");
3804 cic_set_cfqq(cic, NULL, true);
3805 cfq_put_queue(cfqq);
Vivek Goyal246103332009-12-03 12:59:51 -05003806 }
Tejun Heo598971b2012-03-19 15:10:58 -07003807
Tejun Heof4da8072014-09-08 08:15:20 +09003808 cic->blkcg_serial_nr = serial_nr;
Vivek Goyal246103332009-12-03 12:59:51 -05003809}
Tejun Heo598971b2012-03-19 15:10:58 -07003810#else
3811static inline void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) { }
Vivek Goyal246103332009-12-03 12:59:51 -05003812#endif /* CONFIG_CFQ_GROUP_IOSCHED */
3813
Vasily Tarasovc2dea2d2007-07-20 10:06:38 +02003814static struct cfq_queue **
Tejun Heo60a83702015-08-18 14:55:05 -07003815cfq_async_queue_prio(struct cfq_group *cfqg, int ioprio_class, int ioprio)
Vasily Tarasovc2dea2d2007-07-20 10:06:38 +02003816{
Jens Axboefe094d92008-01-31 13:08:54 +01003817 switch (ioprio_class) {
Vasily Tarasovc2dea2d2007-07-20 10:06:38 +02003818 case IOPRIO_CLASS_RT:
Tejun Heo60a83702015-08-18 14:55:05 -07003819 return &cfqg->async_cfqq[0][ioprio];
Tejun Heo598971b2012-03-19 15:10:58 -07003820 case IOPRIO_CLASS_NONE:
3821 ioprio = IOPRIO_NORM;
3822 /* fall through */
Vasily Tarasovc2dea2d2007-07-20 10:06:38 +02003823 case IOPRIO_CLASS_BE:
Tejun Heo60a83702015-08-18 14:55:05 -07003824 return &cfqg->async_cfqq[1][ioprio];
Vasily Tarasovc2dea2d2007-07-20 10:06:38 +02003825 case IOPRIO_CLASS_IDLE:
Tejun Heo60a83702015-08-18 14:55:05 -07003826 return &cfqg->async_idle_cfqq;
Vasily Tarasovc2dea2d2007-07-20 10:06:38 +02003827 default:
3828 BUG();
3829 }
3830}
3831
Jens Axboe15c31be2007-07-10 13:43:25 +02003832static struct cfq_queue *
Tejun Heoabede6d2012-03-19 15:10:57 -07003833cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
Tejun Heo2da8de02015-08-18 14:55:02 -07003834 struct bio *bio)
Jens Axboe15c31be2007-07-10 13:43:25 +02003835{
Jeff Moyerc6ce1942015-01-12 15:21:01 -05003836 int ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio);
3837 int ioprio = IOPRIO_PRIO_DATA(cic->ioprio);
Tejun Heod4aad7f2015-08-18 14:55:04 -07003838 struct cfq_queue **async_cfqq = NULL;
Tejun Heo4ebc1c62015-08-18 14:54:57 -07003839 struct cfq_queue *cfqq;
Tejun Heo322731e2015-08-18 14:55:03 -07003840 struct cfq_group *cfqg;
3841
3842 rcu_read_lock();
Tejun Heoae118892015-08-18 14:55:20 -07003843 cfqg = cfq_lookup_cfqg(cfqd, bio_blkcg(bio));
Tejun Heo322731e2015-08-18 14:55:03 -07003844 if (!cfqg) {
3845 cfqq = &cfqd->oom_cfqq;
3846 goto out;
3847 }
Jens Axboe15c31be2007-07-10 13:43:25 +02003848
Vasily Tarasovc2dea2d2007-07-20 10:06:38 +02003849 if (!is_sync) {
Jeff Moyerc6ce1942015-01-12 15:21:01 -05003850 if (!ioprio_valid(cic->ioprio)) {
3851 struct task_struct *tsk = current;
3852 ioprio = task_nice_ioprio(tsk);
3853 ioprio_class = task_nice_ioclass(tsk);
3854 }
Tejun Heo60a83702015-08-18 14:55:05 -07003855 async_cfqq = cfq_async_queue_prio(cfqg, ioprio_class, ioprio);
Vasily Tarasovc2dea2d2007-07-20 10:06:38 +02003856 cfqq = *async_cfqq;
Tejun Heo4ebc1c62015-08-18 14:54:57 -07003857 if (cfqq)
3858 goto out;
Vasily Tarasovc2dea2d2007-07-20 10:06:38 +02003859 }
3860
Tejun Heoe00f4f42016-11-21 18:03:32 -05003861 cfqq = kmem_cache_alloc_node(cfq_pool,
3862 GFP_NOWAIT | __GFP_ZERO | __GFP_NOWARN,
Tejun Heod4aad7f2015-08-18 14:55:04 -07003863 cfqd->queue->node);
3864 if (!cfqq) {
3865 cfqq = &cfqd->oom_cfqq;
3866 goto out;
3867 }
Jens Axboe15c31be2007-07-10 13:43:25 +02003868
Alexander Potapenko4d608ba2017-01-23 15:06:43 +01003869 /* cfq_init_cfqq() assumes cfqq->ioprio_class is initialized. */
3870 cfqq->ioprio_class = IOPRIO_CLASS_NONE;
Tejun Heod4aad7f2015-08-18 14:55:04 -07003871 cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
3872 cfq_init_prio_data(cfqq, cic);
3873 cfq_link_cfqq_cfqg(cfqq, cfqg);
3874 cfq_log_cfqq(cfqd, cfqq, "alloced");
3875
3876 if (async_cfqq) {
3877 /* a new async queue is created, pin and remember */
Shaohua Li30d7b942011-01-07 08:46:59 +01003878 cfqq->ref++;
Vasily Tarasovc2dea2d2007-07-20 10:06:38 +02003879 *async_cfqq = cfqq;
Jens Axboe15c31be2007-07-10 13:43:25 +02003880 }
Tejun Heo4ebc1c62015-08-18 14:54:57 -07003881out:
Shaohua Li30d7b942011-01-07 08:46:59 +01003882 cfqq->ref++;
Tejun Heo322731e2015-08-18 14:55:03 -07003883 rcu_read_unlock();
Jens Axboe15c31be2007-07-10 13:43:25 +02003884 return cfqq;
3885}
3886
Jens Axboe22e2c502005-06-27 10:55:12 +02003887static void
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003888__cfq_update_io_thinktime(struct cfq_ttime *ttime, u64 slice_idle)
Jens Axboe22e2c502005-06-27 10:55:12 +02003889{
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003890 u64 elapsed = ktime_get_ns() - ttime->last_end_request;
Shaohua Li383cd722011-07-12 14:24:35 +02003891 elapsed = min(elapsed, 2UL * slice_idle);
Jens Axboe22e2c502005-06-27 10:55:12 +02003892
Shaohua Li383cd722011-07-12 14:24:35 +02003893 ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06003894 ttime->ttime_total = div_u64(7*ttime->ttime_total + 256*elapsed, 8);
3895 ttime->ttime_mean = div64_ul(ttime->ttime_total + 128,
3896 ttime->ttime_samples);
Shaohua Li383cd722011-07-12 14:24:35 +02003897}
3898
3899static void
3900cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
Tejun Heoc5869802011-12-14 00:33:41 +01003901 struct cfq_io_cq *cic)
Shaohua Li383cd722011-07-12 14:24:35 +02003902{
Shaohua Lif5f2b6c2011-07-12 14:24:55 +02003903 if (cfq_cfqq_sync(cfqq)) {
Shaohua Li383cd722011-07-12 14:24:35 +02003904 __cfq_update_io_thinktime(&cic->ttime, cfqd->cfq_slice_idle);
Shaohua Lif5f2b6c2011-07-12 14:24:55 +02003905 __cfq_update_io_thinktime(&cfqq->service_tree->ttime,
3906 cfqd->cfq_slice_idle);
3907 }
Shaohua Li7700fc42011-07-12 14:24:56 +02003908#ifdef CONFIG_CFQ_GROUP_IOSCHED
3909 __cfq_update_io_thinktime(&cfqq->cfqg->ttime, cfqd->cfq_group_idle);
3910#endif
Jens Axboe22e2c502005-06-27 10:55:12 +02003911}
3912
Jens Axboe206dc692006-03-28 13:03:44 +02003913static void
Jeff Moyerb2c18e12009-10-23 17:14:49 -04003914cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
Jens Axboe6d048f52007-04-25 12:44:27 +02003915 struct request *rq)
Jens Axboe206dc692006-03-28 13:03:44 +02003916{
Corrado Zoccolo3dde36d2010-02-27 19:45:39 +01003917 sector_t sdist = 0;
Corrado Zoccolo41647e72010-02-27 19:45:40 +01003918 sector_t n_sec = blk_rq_sectors(rq);
Corrado Zoccolo3dde36d2010-02-27 19:45:39 +01003919 if (cfqq->last_request_pos) {
3920 if (cfqq->last_request_pos < blk_rq_pos(rq))
3921 sdist = blk_rq_pos(rq) - cfqq->last_request_pos;
3922 else
3923 sdist = cfqq->last_request_pos - blk_rq_pos(rq);
3924 }
Jens Axboe206dc692006-03-28 13:03:44 +02003925
Corrado Zoccolo3dde36d2010-02-27 19:45:39 +01003926 cfqq->seek_history <<= 1;
Corrado Zoccolo41647e72010-02-27 19:45:40 +01003927 if (blk_queue_nonrot(cfqd->queue))
3928 cfqq->seek_history |= (n_sec < CFQQ_SECT_THR_NONROT);
3929 else
3930 cfqq->seek_history |= (sdist > CFQQ_SEEK_THR);
Jens Axboe206dc692006-03-28 13:03:44 +02003931}
Jens Axboe22e2c502005-06-27 10:55:12 +02003932
Christoph Hellwiga2b80962016-11-01 07:40:09 -06003933static inline bool req_noidle(struct request *req)
3934{
3935 return req_op(req) == REQ_OP_WRITE &&
3936 (req->cmd_flags & (REQ_SYNC | REQ_IDLE)) == REQ_SYNC;
3937}
3938
Jens Axboe22e2c502005-06-27 10:55:12 +02003939/*
3940 * Disable idle window if the process thinks too long or seeks so much that
3941 * it doesn't matter
3942 */
3943static void
3944cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
Tejun Heoc5869802011-12-14 00:33:41 +01003945 struct cfq_io_cq *cic)
Jens Axboe22e2c502005-06-27 10:55:12 +02003946{
Jens Axboe7b679132008-05-30 12:23:07 +02003947 int old_idle, enable_idle;
Jens Axboe1be92f2f2007-04-19 14:32:26 +02003948
Jens Axboe08717142008-01-28 11:38:15 +01003949 /*
3950 * Don't idle for async or idle io prio class
3951 */
3952 if (!cfq_cfqq_sync(cfqq) || cfq_class_idle(cfqq))
Jens Axboe1be92f2f2007-04-19 14:32:26 +02003953 return;
3954
Jens Axboec265a7f2008-06-26 13:49:33 +02003955 enable_idle = old_idle = cfq_cfqq_idle_window(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02003956
Corrado Zoccolo76280af2009-11-26 10:02:58 +01003957 if (cfqq->queued[0] + cfqq->queued[1] >= 4)
3958 cfq_mark_cfqq_deep(cfqq);
3959
Christoph Hellwiga2b80962016-11-01 07:40:09 -06003960 if (cfqq->next_rq && req_noidle(cfqq->next_rq))
Corrado Zoccolo749ef9f2010-09-20 15:24:50 +02003961 enable_idle = 0;
Tejun Heof6e8d012012-03-05 13:15:26 -08003962 else if (!atomic_read(&cic->icq.ioc->active_ref) ||
Tejun Heoc5869802011-12-14 00:33:41 +01003963 !cfqd->cfq_slice_idle ||
3964 (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
Jens Axboe22e2c502005-06-27 10:55:12 +02003965 enable_idle = 0;
Shaohua Li383cd722011-07-12 14:24:35 +02003966 else if (sample_valid(cic->ttime.ttime_samples)) {
3967 if (cic->ttime.ttime_mean > cfqd->cfq_slice_idle)
Jens Axboe22e2c502005-06-27 10:55:12 +02003968 enable_idle = 0;
3969 else
3970 enable_idle = 1;
3971 }
3972
Jens Axboe7b679132008-05-30 12:23:07 +02003973 if (old_idle != enable_idle) {
3974 cfq_log_cfqq(cfqd, cfqq, "idle=%d", enable_idle);
3975 if (enable_idle)
3976 cfq_mark_cfqq_idle_window(cfqq);
3977 else
3978 cfq_clear_cfqq_idle_window(cfqq);
3979 }
Jens Axboe22e2c502005-06-27 10:55:12 +02003980}
3981
Jens Axboe22e2c502005-06-27 10:55:12 +02003982/*
3983 * Check if new_cfqq should preempt the currently active queue. Return 0 for
3984 * no or if we aren't sure, a 1 will cause a preempt.
3985 */
Jens Axboea6151c32009-10-07 20:02:57 +02003986static bool
Jens Axboe22e2c502005-06-27 10:55:12 +02003987cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
Jens Axboe5e705372006-07-13 12:39:25 +02003988 struct request *rq)
Jens Axboe22e2c502005-06-27 10:55:12 +02003989{
Jens Axboe6d048f52007-04-25 12:44:27 +02003990 struct cfq_queue *cfqq;
Jens Axboe22e2c502005-06-27 10:55:12 +02003991
Jens Axboe6d048f52007-04-25 12:44:27 +02003992 cfqq = cfqd->active_queue;
3993 if (!cfqq)
Jens Axboea6151c32009-10-07 20:02:57 +02003994 return false;
Jens Axboe22e2c502005-06-27 10:55:12 +02003995
Jens Axboe6d048f52007-04-25 12:44:27 +02003996 if (cfq_class_idle(new_cfqq))
Jens Axboea6151c32009-10-07 20:02:57 +02003997 return false;
Jens Axboe22e2c502005-06-27 10:55:12 +02003998
3999 if (cfq_class_idle(cfqq))
Jens Axboea6151c32009-10-07 20:02:57 +02004000 return true;
Jens Axboe1e3335d2007-02-14 19:59:49 +01004001
Jens Axboe22e2c502005-06-27 10:55:12 +02004002 /*
Divyesh Shah875feb62010-01-06 18:58:20 -08004003 * Don't allow a non-RT request to preempt an ongoing RT cfqq timeslice.
4004 */
4005 if (cfq_class_rt(cfqq) && !cfq_class_rt(new_cfqq))
4006 return false;
4007
4008 /*
Jens Axboe374f84a2006-07-23 01:42:19 +02004009 * if the new request is sync, but the currently running queue is
4010 * not, let the sync request have priority.
4011 */
Glauber Costa3932a862016-09-22 20:59:59 -04004012 if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq) && !cfq_cfqq_must_dispatch(cfqq))
Jens Axboea6151c32009-10-07 20:02:57 +02004013 return true;
Jens Axboe1e3335d2007-02-14 19:59:49 +01004014
Jan Kara3984aa52016-01-12 16:24:19 +01004015 /*
4016 * Treat ancestors of current cgroup the same way as current cgroup.
4017 * For anybody else we disallow preemption to guarantee service
4018 * fairness among cgroups.
4019 */
4020 if (!cfqg_is_descendant(cfqq->cfqg, new_cfqq->cfqg))
Vivek Goyal8682e1f2009-12-03 12:59:50 -05004021 return false;
4022
4023 if (cfq_slice_used(cfqq))
4024 return true;
4025
Jan Kara6c80731c2016-01-12 16:24:16 +01004026 /*
4027 * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
4028 */
4029 if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
4030 return true;
4031
4032 WARN_ON_ONCE(cfqq->ioprio_class != new_cfqq->ioprio_class);
Vivek Goyal8682e1f2009-12-03 12:59:50 -05004033 /* Allow preemption only if we are idling on sync-noidle tree */
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04004034 if (cfqd->serving_wl_type == SYNC_NOIDLE_WORKLOAD &&
Vivek Goyal8682e1f2009-12-03 12:59:50 -05004035 cfqq_type(new_cfqq) == SYNC_NOIDLE_WORKLOAD &&
Vivek Goyal8682e1f2009-12-03 12:59:50 -05004036 RB_EMPTY_ROOT(&cfqq->sort_list))
4037 return true;
4038
Jens Axboe374f84a2006-07-23 01:42:19 +02004039 /*
Jens Axboeb53d1ed2011-08-19 08:34:48 +02004040 * So both queues are sync. Let the new request get disk time if
4041 * it's a metadata request and the current queue is doing regular IO.
4042 */
Christoph Hellwig65299a32011-08-23 14:50:29 +02004043 if ((rq->cmd_flags & REQ_PRIO) && !cfqq->prio_pending)
Jens Axboeb53d1ed2011-08-19 08:34:48 +02004044 return true;
4045
Shaohua Lid2d59e12010-11-08 15:01:03 +01004046 /* An idle queue should not be idle now for some reason */
4047 if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq))
4048 return true;
4049
Jens Axboe1e3335d2007-02-14 19:59:49 +01004050 if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq))
Jens Axboea6151c32009-10-07 20:02:57 +02004051 return false;
Jens Axboe1e3335d2007-02-14 19:59:49 +01004052
4053 /*
4054 * if this request is as-good as one we would expect from the
4055 * current cfqq, let it preempt
4056 */
Shaohua Lie9ce3352010-03-19 08:03:04 +01004057 if (cfq_rq_close(cfqd, cfqq, rq))
Jens Axboea6151c32009-10-07 20:02:57 +02004058 return true;
Jens Axboe1e3335d2007-02-14 19:59:49 +01004059
Jens Axboea6151c32009-10-07 20:02:57 +02004060 return false;
Jens Axboe22e2c502005-06-27 10:55:12 +02004061}
4062
4063/*
4064 * cfqq preempts the active queue. if we allowed preempt with no slice left,
4065 * let it have half of its nominal slice.
4066 */
4067static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
4068{
Shaohua Lidf0793a2012-01-19 09:20:09 +01004069 enum wl_type_t old_type = cfqq_type(cfqd->active_queue);
4070
Jens Axboe7b679132008-05-30 12:23:07 +02004071 cfq_log_cfqq(cfqd, cfqq, "preempt");
Shaohua Lidf0793a2012-01-19 09:20:09 +01004072 cfq_slice_expired(cfqd, 1);
Jens Axboe22e2c502005-06-27 10:55:12 +02004073
Jens Axboebf572252006-07-19 20:29:12 +02004074 /*
Shaohua Lif8ae6e32011-01-14 08:41:02 +01004075 * workload type is changed, don't save slice, otherwise preempt
4076 * doesn't happen
4077 */
Shaohua Lidf0793a2012-01-19 09:20:09 +01004078 if (old_type != cfqq_type(cfqq))
Vivek Goyal4d2ceea2012-10-03 16:56:57 -04004079 cfqq->cfqg->saved_wl_slice = 0;
Shaohua Lif8ae6e32011-01-14 08:41:02 +01004080
4081 /*
Jens Axboebf572252006-07-19 20:29:12 +02004082 * Put the new queue at the front of the of the current list,
4083 * so we know that it will be selected next.
4084 */
4085 BUG_ON(!cfq_cfqq_on_rr(cfqq));
Jens Axboeedd75ff2007-04-19 12:03:34 +02004086
4087 cfq_service_tree_add(cfqd, cfqq, 1);
Justin TerAvesteda5e0c2011-03-22 21:26:49 +01004088
Justin TerAvest62a37f62011-03-23 08:25:44 +01004089 cfqq->slice_end = 0;
4090 cfq_mark_cfqq_slice_new(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02004091}
4092
4093/*
Jens Axboe5e705372006-07-13 12:39:25 +02004094 * Called when a new fs request (rq) is added (to cfqq). Check if there's
Jens Axboe22e2c502005-06-27 10:55:12 +02004095 * something we should do about it
4096 */
4097static void
Jens Axboe5e705372006-07-13 12:39:25 +02004098cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
4099 struct request *rq)
Jens Axboe22e2c502005-06-27 10:55:12 +02004100{
Tejun Heoc5869802011-12-14 00:33:41 +01004101 struct cfq_io_cq *cic = RQ_CIC(rq);
Jens Axboe12e9fdd2006-06-01 10:09:56 +02004102
Aaron Carroll45333d52008-08-26 15:52:36 +02004103 cfqd->rq_queued++;
Christoph Hellwig65299a32011-08-23 14:50:29 +02004104 if (rq->cmd_flags & REQ_PRIO)
4105 cfqq->prio_pending++;
Jens Axboe374f84a2006-07-23 01:42:19 +02004106
Shaohua Li383cd722011-07-12 14:24:35 +02004107 cfq_update_io_thinktime(cfqd, cfqq, cic);
Jeff Moyerb2c18e12009-10-23 17:14:49 -04004108 cfq_update_io_seektime(cfqd, cfqq, rq);
Jens Axboe9c2c38a2005-08-24 14:57:54 +02004109 cfq_update_idle_window(cfqd, cfqq, cic);
4110
Jeff Moyerb2c18e12009-10-23 17:14:49 -04004111 cfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
Jens Axboe22e2c502005-06-27 10:55:12 +02004112
4113 if (cfqq == cfqd->active_queue) {
4114 /*
Jens Axboeb0291952009-04-07 11:38:31 +02004115 * Remember that we saw a request from this process, but
4116 * don't start queuing just yet. Otherwise we risk seeing lots
4117 * of tiny requests, because we disrupt the normal plugging
Jens Axboed6ceb252009-04-14 14:18:16 +02004118 * and merging. If the request is already larger than a single
4119 * page, let it rip immediately. For that case we assume that
Jens Axboe2d870722009-04-15 12:12:46 +02004120 * merging is already done. Ditto for a busy system that
4121 * has other work pending, don't risk delaying until the
4122 * idle timer unplug to continue working.
Jens Axboe22e2c502005-06-27 10:55:12 +02004123 */
Jens Axboed6ceb252009-04-14 14:18:16 +02004124 if (cfq_cfqq_wait_request(cfqq)) {
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +03004125 if (blk_rq_bytes(rq) > PAGE_SIZE ||
Jens Axboe2d870722009-04-15 12:12:46 +02004126 cfqd->busy_queues > 1) {
Divyesh Shah812df482010-04-08 21:15:35 -07004127 cfq_del_timer(cfqd, cfqq);
Gui Jianfeng554554f2009-12-10 09:38:39 +01004128 cfq_clear_cfqq_wait_request(cfqq);
Christoph Hellwig24ecfbe2011-04-18 11:41:33 +02004129 __blk_run_queue(cfqd->queue);
Divyesh Shaha11cdaa2010-04-13 19:59:17 +02004130 } else {
Tejun Heo155fead2012-04-01 14:38:44 -07004131 cfqg_stats_update_idle_time(cfqq->cfqg);
Vivek Goyalbf7919372009-12-03 12:59:37 -05004132 cfq_mark_cfqq_must_dispatch(cfqq);
Divyesh Shaha11cdaa2010-04-13 19:59:17 +02004133 }
Jens Axboed6ceb252009-04-14 14:18:16 +02004134 }
Jens Axboe5e705372006-07-13 12:39:25 +02004135 } else if (cfq_should_preempt(cfqd, cfqq, rq)) {
Jens Axboe22e2c502005-06-27 10:55:12 +02004136 /*
4137 * not the active queue - expire current slice if it is
4138 * idle and has expired it's mean thinktime or this new queue
Divyesh Shah3a9a3f62009-01-30 12:46:41 +01004139 * has some old slice time left and is of higher priority or
4140 * this new queue is RT and the current one is BE
Jens Axboe22e2c502005-06-27 10:55:12 +02004141 */
4142 cfq_preempt_queue(cfqd, cfqq);
Christoph Hellwig24ecfbe2011-04-18 11:41:33 +02004143 __blk_run_queue(cfqd->queue);
Jens Axboe22e2c502005-06-27 10:55:12 +02004144 }
4145}
4146
Jens Axboe165125e2007-07-24 09:28:11 +02004147static void cfq_insert_request(struct request_queue *q, struct request *rq)
Jens Axboe22e2c502005-06-27 10:55:12 +02004148{
Jens Axboeb4878f22005-10-20 16:42:29 +02004149 struct cfq_data *cfqd = q->elevator->elevator_data;
Jens Axboe5e705372006-07-13 12:39:25 +02004150 struct cfq_queue *cfqq = RQ_CFQQ(rq);
Jens Axboe22e2c502005-06-27 10:55:12 +02004151
Jens Axboe7b679132008-05-30 12:23:07 +02004152 cfq_log_cfqq(cfqd, cfqq, "insert_request");
Tejun Heoabede6d2012-03-19 15:10:57 -07004153 cfq_init_prio_data(cfqq, RQ_CIC(rq));
Linus Torvalds1da177e2005-04-16 15:20:36 -07004154
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06004155 rq->fifo_time = ktime_get_ns() + cfqd->cfq_fifo_expire[rq_is_sync(rq)];
Jens Axboe22e2c502005-06-27 10:55:12 +02004156 list_add_tail(&rq->queuelist, &cfqq->fifo);
Corrado Zoccoloaa6f6a32009-10-26 22:44:33 +01004157 cfq_add_rq_rb(rq);
Christoph Hellwigef295ec2016-10-28 08:48:16 -06004158 cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group,
Tejun Heo155fead2012-04-01 14:38:44 -07004159 rq->cmd_flags);
Jens Axboe5e705372006-07-13 12:39:25 +02004160 cfq_rq_enqueued(cfqd, cfqq, rq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004161}
4162
Aaron Carroll45333d52008-08-26 15:52:36 +02004163/*
4164 * Update hw_tag based on peak queue depth over 50 samples under
4165 * sufficient load.
4166 */
4167static void cfq_update_hw_tag(struct cfq_data *cfqd)
4168{
Shaohua Li1a1238a2009-10-27 08:46:23 +01004169 struct cfq_queue *cfqq = cfqd->active_queue;
4170
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01004171 if (cfqd->rq_in_driver > cfqd->hw_tag_est_depth)
4172 cfqd->hw_tag_est_depth = cfqd->rq_in_driver;
Corrado Zoccoloe459dd02009-11-26 10:02:57 +01004173
4174 if (cfqd->hw_tag == 1)
4175 return;
Aaron Carroll45333d52008-08-26 15:52:36 +02004176
4177 if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN &&
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01004178 cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN)
Aaron Carroll45333d52008-08-26 15:52:36 +02004179 return;
4180
Shaohua Li1a1238a2009-10-27 08:46:23 +01004181 /*
4182 * If active queue hasn't enough requests and can idle, cfq might not
4183 * dispatch sufficient requests to hardware. Don't zero hw_tag in this
4184 * case
4185 */
4186 if (cfqq && cfq_cfqq_idle_window(cfqq) &&
4187 cfqq->dispatched + cfqq->queued[0] + cfqq->queued[1] <
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01004188 CFQ_HW_QUEUE_MIN && cfqd->rq_in_driver < CFQ_HW_QUEUE_MIN)
Shaohua Li1a1238a2009-10-27 08:46:23 +01004189 return;
4190
Aaron Carroll45333d52008-08-26 15:52:36 +02004191 if (cfqd->hw_tag_samples++ < 50)
4192 return;
4193
Corrado Zoccoloe459dd02009-11-26 10:02:57 +01004194 if (cfqd->hw_tag_est_depth >= CFQ_HW_QUEUE_MIN)
Aaron Carroll45333d52008-08-26 15:52:36 +02004195 cfqd->hw_tag = 1;
4196 else
4197 cfqd->hw_tag = 0;
Aaron Carroll45333d52008-08-26 15:52:36 +02004198}
4199
Vivek Goyal7667aa02009-12-08 17:52:58 -05004200static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
4201{
Tejun Heoc5869802011-12-14 00:33:41 +01004202 struct cfq_io_cq *cic = cfqd->active_cic;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06004203 u64 now = ktime_get_ns();
Vivek Goyal7667aa02009-12-08 17:52:58 -05004204
Justin TerAvest02a8f012011-02-09 14:20:03 +01004205 /* If the queue already has requests, don't wait */
4206 if (!RB_EMPTY_ROOT(&cfqq->sort_list))
4207 return false;
4208
Vivek Goyal7667aa02009-12-08 17:52:58 -05004209 /* If there are other queues in the group, don't wait */
4210 if (cfqq->cfqg->nr_cfqq > 1)
4211 return false;
4212
Shaohua Li7700fc42011-07-12 14:24:56 +02004213 /* the only queue in the group, but think time is big */
4214 if (cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true))
4215 return false;
4216
Vivek Goyal7667aa02009-12-08 17:52:58 -05004217 if (cfq_slice_used(cfqq))
4218 return true;
4219
4220 /* if slice left is less than think time, wait busy */
Shaohua Li383cd722011-07-12 14:24:35 +02004221 if (cic && sample_valid(cic->ttime.ttime_samples)
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06004222 && (cfqq->slice_end - now < cic->ttime.ttime_mean))
Vivek Goyal7667aa02009-12-08 17:52:58 -05004223 return true;
4224
4225 /*
4226 * If think times is less than a jiffy than ttime_mean=0 and above
4227 * will not be true. It might happen that slice has not expired yet
4228 * but will expire soon (4-5 ns) during select_queue(). To cover the
4229 * case where think time is less than a jiffy, mark the queue wait
4230 * busy if only 1 jiffy is left in the slice.
4231 */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06004232 if (cfqq->slice_end - now <= jiffies_to_nsecs(1))
Vivek Goyal7667aa02009-12-08 17:52:58 -05004233 return true;
4234
4235 return false;
4236}
4237
Jens Axboe165125e2007-07-24 09:28:11 +02004238static void cfq_completed_request(struct request_queue *q, struct request *rq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004239{
Jens Axboe5e705372006-07-13 12:39:25 +02004240 struct cfq_queue *cfqq = RQ_CFQQ(rq);
Jens Axboeb4878f22005-10-20 16:42:29 +02004241 struct cfq_data *cfqd = cfqq->cfqd;
Jens Axboe5380a102006-07-13 12:37:56 +02004242 const int sync = rq_is_sync(rq);
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06004243 u64 now = ktime_get_ns();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004244
Christoph Hellwiga2b80962016-11-01 07:40:09 -06004245 cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d", req_noidle(rq));
Linus Torvalds1da177e2005-04-16 15:20:36 -07004246
Aaron Carroll45333d52008-08-26 15:52:36 +02004247 cfq_update_hw_tag(cfqd);
4248
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01004249 WARN_ON(!cfqd->rq_in_driver);
Jens Axboe6d048f52007-04-25 12:44:27 +02004250 WARN_ON(!cfqq->dispatched);
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01004251 cfqd->rq_in_driver--;
Jens Axboe6d048f52007-04-25 12:44:27 +02004252 cfqq->dispatched--;
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02004253 (RQ_CFQG(rq))->dispatched--;
Tejun Heo155fead2012-04-01 14:38:44 -07004254 cfqg_stats_update_completion(cfqq->cfqg, rq_start_time_ns(rq),
Christoph Hellwigef295ec2016-10-28 08:48:16 -06004255 rq_io_start_time_ns(rq), rq->cmd_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004256
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01004257 cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
Jens Axboe3ed9a292007-04-23 08:33:33 +02004258
Vivek Goyal365722b2009-10-03 15:21:27 +02004259 if (sync) {
Vivek Goyal34b98d02012-10-03 16:56:58 -04004260 struct cfq_rb_root *st;
Shaohua Lif5f2b6c2011-07-12 14:24:55 +02004261
Shaohua Li383cd722011-07-12 14:24:35 +02004262 RQ_CIC(rq)->ttime.last_end_request = now;
Shaohua Lif5f2b6c2011-07-12 14:24:55 +02004263
4264 if (cfq_cfqq_on_rr(cfqq))
Vivek Goyal34b98d02012-10-03 16:56:58 -04004265 st = cfqq->service_tree;
Shaohua Lif5f2b6c2011-07-12 14:24:55 +02004266 else
Vivek Goyal34b98d02012-10-03 16:56:58 -04004267 st = st_for(cfqq->cfqg, cfqq_class(cfqq),
4268 cfqq_type(cfqq));
4269
4270 st->ttime.last_end_request = now;
Jan Kara149321a2016-06-28 09:04:01 +02004271 /*
4272 * We have to do this check in jiffies since start_time is in
4273 * jiffies and it is not trivial to convert to ns. If
4274 * cfq_fifo_expire[1] ever comes close to 1 jiffie, this test
4275 * will become problematic but so far we are fine (the default
4276 * is 128 ms).
4277 */
4278 if (!time_after(rq->start_time +
4279 nsecs_to_jiffies(cfqd->cfq_fifo_expire[1]),
4280 jiffies))
Corrado Zoccolo573412b2009-12-06 11:48:52 +01004281 cfqd->last_delayed_sync = now;
Vivek Goyal365722b2009-10-03 15:21:27 +02004282 }
Jens Axboecaaa5f92006-06-16 11:23:00 +02004283
Shaohua Li7700fc42011-07-12 14:24:56 +02004284#ifdef CONFIG_CFQ_GROUP_IOSCHED
4285 cfqq->cfqg->ttime.last_end_request = now;
4286#endif
4287
Jens Axboecaaa5f92006-06-16 11:23:00 +02004288 /*
4289 * If this is the active queue, check if it needs to be expired,
4290 * or if we want to idle in case it has no pending requests.
4291 */
4292 if (cfqd->active_queue == cfqq) {
Jens Axboea36e71f2009-04-15 12:15:11 +02004293 const bool cfqq_empty = RB_EMPTY_ROOT(&cfqq->sort_list);
4294
Jens Axboe44f7c162007-01-19 11:51:58 +11004295 if (cfq_cfqq_slice_new(cfqq)) {
4296 cfq_set_prio_slice(cfqd, cfqq);
4297 cfq_clear_cfqq_slice_new(cfqq);
4298 }
Vivek Goyalf75edf22009-12-03 12:59:53 -05004299
4300 /*
Vivek Goyal7667aa02009-12-08 17:52:58 -05004301 * Should we wait for next request to come in before we expire
4302 * the queue.
Vivek Goyalf75edf22009-12-03 12:59:53 -05004303 */
Vivek Goyal7667aa02009-12-08 17:52:58 -05004304 if (cfq_should_wait_busy(cfqd, cfqq)) {
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06004305 u64 extend_sl = cfqd->cfq_slice_idle;
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02004306 if (!cfqd->cfq_slice_idle)
4307 extend_sl = cfqd->cfq_group_idle;
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06004308 cfqq->slice_end = now + extend_sl;
Vivek Goyalf75edf22009-12-03 12:59:53 -05004309 cfq_mark_cfqq_wait_busy(cfqq);
Divyesh Shahb1ffe732010-03-25 15:45:03 +01004310 cfq_log_cfqq(cfqd, cfqq, "will busy wait");
Vivek Goyalf75edf22009-12-03 12:59:53 -05004311 }
4312
Jens Axboea36e71f2009-04-15 12:15:11 +02004313 /*
Corrado Zoccolo8e550632009-11-26 10:02:58 +01004314 * Idling is not enabled on:
4315 * - expired queues
4316 * - idle-priority queues
4317 * - async queues
4318 * - queues with still some requests queued
4319 * - when there is a close cooperator
Jens Axboea36e71f2009-04-15 12:15:11 +02004320 */
Jens Axboe08717142008-01-28 11:38:15 +01004321 if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq))
Vivek Goyale5ff0822010-04-26 19:25:11 +02004322 cfq_slice_expired(cfqd, 1);
Corrado Zoccolo8e550632009-11-26 10:02:58 +01004323 else if (sync && cfqq_empty &&
4324 !cfq_close_cooperator(cfqd, cfqq)) {
Corrado Zoccolo749ef9f2010-09-20 15:24:50 +02004325 cfq_arm_slice_timer(cfqd);
Corrado Zoccolo8e550632009-11-26 10:02:58 +01004326 }
Jens Axboecaaa5f92006-06-16 11:23:00 +02004327 }
Jens Axboe6d048f52007-04-25 12:44:27 +02004328
Corrado Zoccolo53c583d2010-02-28 19:45:05 +01004329 if (!cfqd->rq_in_driver)
Jens Axboe23e018a2009-10-05 08:52:35 +02004330 cfq_schedule_dispatch(cfqd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004331}
4332
Christoph Hellwigef295ec2016-10-28 08:48:16 -06004333static void cfqq_boost_on_prio(struct cfq_queue *cfqq, unsigned int op)
Jens Axboeb8269db2016-06-09 15:47:29 -06004334{
4335 /*
4336 * If REQ_PRIO is set, boost class and prio level, if it's below
4337 * BE/NORM. If prio is not set, restore the potentially boosted
4338 * class/prio level.
4339 */
Christoph Hellwigef295ec2016-10-28 08:48:16 -06004340 if (!(op & REQ_PRIO)) {
Jens Axboeb8269db2016-06-09 15:47:29 -06004341 cfqq->ioprio_class = cfqq->org_ioprio_class;
4342 cfqq->ioprio = cfqq->org_ioprio;
4343 } else {
4344 if (cfq_class_idle(cfqq))
4345 cfqq->ioprio_class = IOPRIO_CLASS_BE;
4346 if (cfqq->ioprio > IOPRIO_NORM)
4347 cfqq->ioprio = IOPRIO_NORM;
4348 }
4349}
4350
Jens Axboe89850f72006-07-22 16:48:31 +02004351static inline int __cfq_may_queue(struct cfq_queue *cfqq)
Jens Axboe22e2c502005-06-27 10:55:12 +02004352{
Jens Axboe1b379d82009-08-11 08:26:11 +02004353 if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
Jens Axboe3b181522005-06-27 10:56:24 +02004354 cfq_mark_cfqq_must_alloc_slice(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02004355 return ELV_MQUEUE_MUST;
Jens Axboe3b181522005-06-27 10:56:24 +02004356 }
Jens Axboe22e2c502005-06-27 10:55:12 +02004357
4358 return ELV_MQUEUE_MAY;
Jens Axboe22e2c502005-06-27 10:55:12 +02004359}
4360
Christoph Hellwigef295ec2016-10-28 08:48:16 -06004361static int cfq_may_queue(struct request_queue *q, unsigned int op)
Jens Axboe22e2c502005-06-27 10:55:12 +02004362{
4363 struct cfq_data *cfqd = q->elevator->elevator_data;
4364 struct task_struct *tsk = current;
Tejun Heoc5869802011-12-14 00:33:41 +01004365 struct cfq_io_cq *cic;
Jens Axboe22e2c502005-06-27 10:55:12 +02004366 struct cfq_queue *cfqq;
4367
4368 /*
4369 * don't force setup of a queue from here, as a call to may_queue
4370 * does not necessarily imply that a request actually will be queued.
4371 * so just lookup a possibly existing queue, or return 'may queue'
4372 * if that fails
4373 */
Jens Axboe4ac845a2008-01-24 08:44:49 +01004374 cic = cfq_cic_lookup(cfqd, tsk->io_context);
Vasily Tarasov91fac312007-04-25 12:29:51 +02004375 if (!cic)
4376 return ELV_MQUEUE_MAY;
4377
Christoph Hellwigef295ec2016-10-28 08:48:16 -06004378 cfqq = cic_to_cfqq(cic, op_is_sync(op));
Jens Axboe22e2c502005-06-27 10:55:12 +02004379 if (cfqq) {
Tejun Heoabede6d2012-03-19 15:10:57 -07004380 cfq_init_prio_data(cfqq, cic);
Christoph Hellwigef295ec2016-10-28 08:48:16 -06004381 cfqq_boost_on_prio(cfqq, op);
Jens Axboe22e2c502005-06-27 10:55:12 +02004382
Jens Axboe89850f72006-07-22 16:48:31 +02004383 return __cfq_may_queue(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02004384 }
4385
4386 return ELV_MQUEUE_MAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004387}
4388
Linus Torvalds1da177e2005-04-16 15:20:36 -07004389/*
4390 * queue lock held here
4391 */
Jens Axboebb37b942006-12-01 10:42:33 +01004392static void cfq_put_request(struct request *rq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004393{
Jens Axboe5e705372006-07-13 12:39:25 +02004394 struct cfq_queue *cfqq = RQ_CFQQ(rq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004395
Jens Axboe5e705372006-07-13 12:39:25 +02004396 if (cfqq) {
Jens Axboe22e2c502005-06-27 10:55:12 +02004397 const int rw = rq_data_dir(rq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004398
Jens Axboe22e2c502005-06-27 10:55:12 +02004399 BUG_ON(!cfqq->allocated[rw]);
4400 cfqq->allocated[rw]--;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004401
Vivek Goyal7f1dc8a2010-04-21 17:44:16 +02004402 /* Put down rq reference on cfqg */
Tejun Heoeb7d8c072012-03-23 14:02:53 +01004403 cfqg_put(RQ_CFQG(rq));
Tejun Heoa612fdd2011-12-14 00:33:41 +01004404 rq->elv.priv[0] = NULL;
4405 rq->elv.priv[1] = NULL;
Vivek Goyal7f1dc8a2010-04-21 17:44:16 +02004406
Linus Torvalds1da177e2005-04-16 15:20:36 -07004407 cfq_put_queue(cfqq);
4408 }
4409}
4410
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04004411static struct cfq_queue *
Tejun Heoc5869802011-12-14 00:33:41 +01004412cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_cq *cic,
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04004413 struct cfq_queue *cfqq)
4414{
4415 cfq_log_cfqq(cfqd, cfqq, "merging with queue %p", cfqq->new_cfqq);
4416 cic_set_cfqq(cic, cfqq->new_cfqq, 1);
Jeff Moyerb3b6d042009-10-23 17:14:51 -04004417 cfq_mark_cfqq_coop(cfqq->new_cfqq);
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04004418 cfq_put_queue(cfqq);
4419 return cic_to_cfqq(cic, 1);
4420}
4421
Jeff Moyere6c5bc72009-10-23 17:14:52 -04004422/*
4423 * Returns NULL if a new cfqq should be allocated, or the old cfqq if this
4424 * was the last process referring to said cfqq.
4425 */
4426static struct cfq_queue *
Tejun Heoc5869802011-12-14 00:33:41 +01004427split_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq)
Jeff Moyere6c5bc72009-10-23 17:14:52 -04004428{
4429 if (cfqq_process_refs(cfqq) == 1) {
Jeff Moyere6c5bc72009-10-23 17:14:52 -04004430 cfqq->pid = current->pid;
4431 cfq_clear_cfqq_coop(cfqq);
Shaohua Liae54abe2010-02-05 13:11:45 +01004432 cfq_clear_cfqq_split_coop(cfqq);
Jeff Moyere6c5bc72009-10-23 17:14:52 -04004433 return cfqq;
4434 }
4435
4436 cic_set_cfqq(cic, NULL, 1);
Shaohua Lid02a2c02010-05-25 10:16:53 +02004437
4438 cfq_put_cooperator(cfqq);
4439
Jeff Moyere6c5bc72009-10-23 17:14:52 -04004440 cfq_put_queue(cfqq);
4441 return NULL;
4442}
Linus Torvalds1da177e2005-04-16 15:20:36 -07004443/*
Jens Axboe22e2c502005-06-27 10:55:12 +02004444 * Allocate cfq data structures associated with this request.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004445 */
Jens Axboe22e2c502005-06-27 10:55:12 +02004446static int
Tejun Heo852c7882012-03-05 13:15:27 -08004447cfq_set_request(struct request_queue *q, struct request *rq, struct bio *bio,
4448 gfp_t gfp_mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004449{
4450 struct cfq_data *cfqd = q->elevator->elevator_data;
Tejun Heof1f8cc92011-12-14 00:33:42 +01004451 struct cfq_io_cq *cic = icq_to_cic(rq->elv.icq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004452 const int rw = rq_data_dir(rq);
Jens Axboea6151c32009-10-07 20:02:57 +02004453 const bool is_sync = rq_is_sync(rq);
Jens Axboe22e2c502005-06-27 10:55:12 +02004454 struct cfq_queue *cfqq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004455
Tejun Heo216284c2011-12-14 00:33:38 +01004456 spin_lock_irq(q->queue_lock);
Tejun Heof1f8cc92011-12-14 00:33:42 +01004457
Tejun Heo598971b2012-03-19 15:10:58 -07004458 check_ioprio_changed(cic, bio);
4459 check_blkcg_changed(cic, bio);
Jeff Moyere6c5bc72009-10-23 17:14:52 -04004460new_queue:
Vasily Tarasov91fac312007-04-25 12:29:51 +02004461 cfqq = cic_to_cfqq(cic, is_sync);
Vivek Goyal32f2e802009-07-09 22:13:16 +02004462 if (!cfqq || cfqq == &cfqd->oom_cfqq) {
Tejun Heobce61332015-08-18 14:54:59 -07004463 if (cfqq)
4464 cfq_put_queue(cfqq);
Tejun Heo2da8de02015-08-18 14:55:02 -07004465 cfqq = cfq_get_queue(cfqd, is_sync, cic, bio);
Vasily Tarasov91fac312007-04-25 12:29:51 +02004466 cic_set_cfqq(cic, cfqq, is_sync);
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04004467 } else {
4468 /*
Jeff Moyere6c5bc72009-10-23 17:14:52 -04004469 * If the queue was seeky for too long, break it apart.
4470 */
Shaohua Liae54abe2010-02-05 13:11:45 +01004471 if (cfq_cfqq_coop(cfqq) && cfq_cfqq_split_coop(cfqq)) {
Jeff Moyere6c5bc72009-10-23 17:14:52 -04004472 cfq_log_cfqq(cfqd, cfqq, "breaking apart cfqq");
4473 cfqq = split_cfqq(cic, cfqq);
4474 if (!cfqq)
4475 goto new_queue;
4476 }
4477
4478 /*
Jeff Moyerdf5fe3e2009-10-23 17:14:50 -04004479 * Check to see if this queue is scheduled to merge with
4480 * another, closely cooperating queue. The merging of
4481 * queues happens here as it must be done in process context.
4482 * The reference on new_cfqq was taken in merge_cfqqs.
4483 */
4484 if (cfqq->new_cfqq)
4485 cfqq = cfq_merge_cfqqs(cfqd, cic, cfqq);
Vasily Tarasov91fac312007-04-25 12:29:51 +02004486 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004487
4488 cfqq->allocated[rw]++;
Jens Axboe5e705372006-07-13 12:39:25 +02004489
Jens Axboe6fae9c22011-03-01 15:04:39 -05004490 cfqq->ref++;
Tejun Heoeb7d8c072012-03-23 14:02:53 +01004491 cfqg_get(cfqq->cfqg);
Tejun Heoa612fdd2011-12-14 00:33:41 +01004492 rq->elv.priv[0] = cfqq;
Tejun Heo1adaf3d2012-03-05 13:15:15 -08004493 rq->elv.priv[1] = cfqq->cfqg;
Tejun Heo216284c2011-12-14 00:33:38 +01004494 spin_unlock_irq(q->queue_lock);
Jens Axboe5e705372006-07-13 12:39:25 +02004495 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004496}
4497
David Howells65f27f32006-11-22 14:55:48 +00004498static void cfq_kick_queue(struct work_struct *work)
Jens Axboe22e2c502005-06-27 10:55:12 +02004499{
David Howells65f27f32006-11-22 14:55:48 +00004500 struct cfq_data *cfqd =
Jens Axboe23e018a2009-10-05 08:52:35 +02004501 container_of(work, struct cfq_data, unplug_work);
Jens Axboe165125e2007-07-24 09:28:11 +02004502 struct request_queue *q = cfqd->queue;
Jens Axboe22e2c502005-06-27 10:55:12 +02004503
Jens Axboe40bb54d2009-04-15 12:11:10 +02004504 spin_lock_irq(q->queue_lock);
Christoph Hellwig24ecfbe2011-04-18 11:41:33 +02004505 __blk_run_queue(cfqd->queue);
Jens Axboe40bb54d2009-04-15 12:11:10 +02004506 spin_unlock_irq(q->queue_lock);
Jens Axboe22e2c502005-06-27 10:55:12 +02004507}
4508
4509/*
4510 * Timer running if the active_queue is currently idling inside its time slice
4511 */
Jan Kara91148322016-06-08 15:11:39 +02004512static enum hrtimer_restart cfq_idle_slice_timer(struct hrtimer *timer)
Jens Axboe22e2c502005-06-27 10:55:12 +02004513{
Jan Kara91148322016-06-08 15:11:39 +02004514 struct cfq_data *cfqd = container_of(timer, struct cfq_data,
4515 idle_slice_timer);
Jens Axboe22e2c502005-06-27 10:55:12 +02004516 struct cfq_queue *cfqq;
4517 unsigned long flags;
Jens Axboe3c6bd2f2007-01-19 12:06:33 +11004518 int timed_out = 1;
Jens Axboe22e2c502005-06-27 10:55:12 +02004519
Jens Axboe7b679132008-05-30 12:23:07 +02004520 cfq_log(cfqd, "idle timer fired");
4521
Jens Axboe22e2c502005-06-27 10:55:12 +02004522 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
4523
Jens Axboefe094d92008-01-31 13:08:54 +01004524 cfqq = cfqd->active_queue;
4525 if (cfqq) {
Jens Axboe3c6bd2f2007-01-19 12:06:33 +11004526 timed_out = 0;
4527
Jens Axboe22e2c502005-06-27 10:55:12 +02004528 /*
Jens Axboeb0291952009-04-07 11:38:31 +02004529 * We saw a request before the queue expired, let it through
4530 */
4531 if (cfq_cfqq_must_dispatch(cfqq))
4532 goto out_kick;
4533
4534 /*
Jens Axboe22e2c502005-06-27 10:55:12 +02004535 * expired
4536 */
Jens Axboe44f7c162007-01-19 11:51:58 +11004537 if (cfq_slice_used(cfqq))
Jens Axboe22e2c502005-06-27 10:55:12 +02004538 goto expire;
4539
4540 /*
4541 * only expire and reinvoke request handler, if there are
4542 * other queues with pending requests
4543 */
Jens Axboecaaa5f92006-06-16 11:23:00 +02004544 if (!cfqd->busy_queues)
Jens Axboe22e2c502005-06-27 10:55:12 +02004545 goto out_cont;
Jens Axboe22e2c502005-06-27 10:55:12 +02004546
4547 /*
4548 * not expired and it has a request pending, let it dispatch
4549 */
Jens Axboe75e50982009-04-07 08:56:14 +02004550 if (!RB_EMPTY_ROOT(&cfqq->sort_list))
Jens Axboe22e2c502005-06-27 10:55:12 +02004551 goto out_kick;
Corrado Zoccolo76280af2009-11-26 10:02:58 +01004552
4553 /*
4554 * Queue depth flag is reset only when the idle didn't succeed
4555 */
4556 cfq_clear_cfqq_deep(cfqq);
Jens Axboe22e2c502005-06-27 10:55:12 +02004557 }
4558expire:
Vivek Goyale5ff0822010-04-26 19:25:11 +02004559 cfq_slice_expired(cfqd, timed_out);
Jens Axboe22e2c502005-06-27 10:55:12 +02004560out_kick:
Jens Axboe23e018a2009-10-05 08:52:35 +02004561 cfq_schedule_dispatch(cfqd);
Jens Axboe22e2c502005-06-27 10:55:12 +02004562out_cont:
4563 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
Jan Kara91148322016-06-08 15:11:39 +02004564 return HRTIMER_NORESTART;
Jens Axboe22e2c502005-06-27 10:55:12 +02004565}
4566
Jens Axboe3b181522005-06-27 10:56:24 +02004567static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
4568{
Jan Kara91148322016-06-08 15:11:39 +02004569 hrtimer_cancel(&cfqd->idle_slice_timer);
Jens Axboe23e018a2009-10-05 08:52:35 +02004570 cancel_work_sync(&cfqd->unplug_work);
Jens Axboe3b181522005-06-27 10:56:24 +02004571}
Jens Axboe22e2c502005-06-27 10:55:12 +02004572
Jens Axboeb374d182008-10-31 10:05:07 +01004573static void cfq_exit_queue(struct elevator_queue *e)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004574{
Jens Axboe22e2c502005-06-27 10:55:12 +02004575 struct cfq_data *cfqd = e->elevator_data;
Jens Axboe165125e2007-07-24 09:28:11 +02004576 struct request_queue *q = cfqd->queue;
Jens Axboe22e2c502005-06-27 10:55:12 +02004577
Jens Axboe3b181522005-06-27 10:56:24 +02004578 cfq_shutdown_timer_wq(cfqd);
Jens Axboee2d74ac2006-03-28 08:59:01 +02004579
Al Virod9ff4182006-03-18 13:51:22 -05004580 spin_lock_irq(q->queue_lock);
Jens Axboee2d74ac2006-03-28 08:59:01 +02004581
Al Virod9ff4182006-03-18 13:51:22 -05004582 if (cfqd->active_queue)
Vivek Goyale5ff0822010-04-26 19:25:11 +02004583 __cfq_slice_expired(cfqd, cfqd->active_queue, 0);
Jens Axboee2d74ac2006-03-28 08:59:01 +02004584
Tejun Heo03aa2642012-03-05 13:15:19 -08004585 spin_unlock_irq(q->queue_lock);
4586
Al Viroa90d7422006-03-18 12:05:37 -05004587 cfq_shutdown_timer_wq(cfqd);
4588
Tejun Heoffea73f2012-06-04 10:02:29 +02004589#ifdef CONFIG_CFQ_GROUP_IOSCHED
4590 blkcg_deactivate_policy(q, &blkcg_policy_cfq);
4591#else
Tejun Heof51b8022012-03-05 13:15:05 -08004592 kfree(cfqd->root_group);
Vivek Goyal2abae552011-05-23 10:02:19 +02004593#endif
Vivek Goyal56edf7d2011-05-19 15:38:22 -04004594 kfree(cfqd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004595}
4596
Jianpeng Mad50235b2013-07-03 13:25:24 +02004597static int cfq_init_queue(struct request_queue *q, struct elevator_type *e)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004598{
4599 struct cfq_data *cfqd;
Tejun Heo3c798392012-04-16 13:57:25 -07004600 struct blkcg_gq *blkg __maybe_unused;
Tejun Heoa2b16932012-04-13 13:11:33 -07004601 int i, ret;
Jianpeng Mad50235b2013-07-03 13:25:24 +02004602 struct elevator_queue *eq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004603
Jianpeng Mad50235b2013-07-03 13:25:24 +02004604 eq = elevator_alloc(q, e);
4605 if (!eq)
Tejun Heob2fab5a2012-03-05 13:14:57 -08004606 return -ENOMEM;
Konstantin Khlebnikov80b15c72010-05-20 23:21:41 +04004607
Joe Perchesc1b511e2013-08-29 15:21:42 -07004608 cfqd = kzalloc_node(sizeof(*cfqd), GFP_KERNEL, q->node);
Jianpeng Mad50235b2013-07-03 13:25:24 +02004609 if (!cfqd) {
4610 kobject_put(&eq->kobj);
4611 return -ENOMEM;
4612 }
4613 eq->elevator_data = cfqd;
4614
Tejun Heof51b8022012-03-05 13:15:05 -08004615 cfqd->queue = q;
Jianpeng Mad50235b2013-07-03 13:25:24 +02004616 spin_lock_irq(q->queue_lock);
4617 q->elevator = eq;
4618 spin_unlock_irq(q->queue_lock);
Tejun Heof51b8022012-03-05 13:15:05 -08004619
Vivek Goyal1fa8f6d2009-12-03 12:59:41 -05004620 /* Init root service tree */
4621 cfqd->grp_service_tree = CFQ_RB_ROOT;
4622
Tejun Heof51b8022012-03-05 13:15:05 -08004623 /* Init root group and prefer root group over other groups by default */
Vivek Goyal25fb5162009-12-03 12:59:46 -05004624#ifdef CONFIG_CFQ_GROUP_IOSCHED
Tejun Heo3c798392012-04-16 13:57:25 -07004625 ret = blkcg_activate_policy(q, &blkcg_policy_cfq);
Tejun Heoa2b16932012-04-13 13:11:33 -07004626 if (ret)
4627 goto out_free;
Vivek Goyal5624a4e2011-05-19 15:38:28 -04004628
Tejun Heoa2b16932012-04-13 13:11:33 -07004629 cfqd->root_group = blkg_to_cfqg(q->root_blkg);
Tejun Heof51b8022012-03-05 13:15:05 -08004630#else
Tejun Heoa2b16932012-04-13 13:11:33 -07004631 ret = -ENOMEM;
Tejun Heof51b8022012-03-05 13:15:05 -08004632 cfqd->root_group = kzalloc_node(sizeof(*cfqd->root_group),
4633 GFP_KERNEL, cfqd->queue->node);
Tejun Heoa2b16932012-04-13 13:11:33 -07004634 if (!cfqd->root_group)
4635 goto out_free;
Vivek Goyal5624a4e2011-05-19 15:38:28 -04004636
Tejun Heoa2b16932012-04-13 13:11:33 -07004637 cfq_init_cfqg_base(cfqd->root_group);
Tejun Heo3ecca622015-08-18 14:55:35 -07004638 cfqd->root_group->weight = 2 * CFQ_WEIGHT_LEGACY_DFL;
4639 cfqd->root_group->leaf_weight = 2 * CFQ_WEIGHT_LEGACY_DFL;
Tejun Heo69d7fde2015-08-18 14:55:36 -07004640#endif
Vivek Goyal5624a4e2011-05-19 15:38:28 -04004641
Jens Axboe26a2ac02009-04-23 12:13:27 +02004642 /*
4643 * Not strictly needed (since RB_ROOT just clears the node and we
4644 * zeroed cfqd on alloc), but better be safe in case someone decides
4645 * to add magic to the rb code
4646 */
4647 for (i = 0; i < CFQ_PRIO_LISTS; i++)
4648 cfqd->prio_trees[i] = RB_ROOT;
4649
Jens Axboe6118b702009-06-30 09:34:12 +02004650 /*
Tejun Heod4aad7f2015-08-18 14:55:04 -07004651 * Our fallback cfqq if cfq_get_queue() runs into OOM issues.
Jens Axboe6118b702009-06-30 09:34:12 +02004652 * Grab a permanent reference to it, so that the normal code flow
Tejun Heof51b8022012-03-05 13:15:05 -08004653 * will not attempt to free it. oom_cfqq is linked to root_group
4654 * but shouldn't hold a reference as it'll never be unlinked. Lose
4655 * the reference from linking right away.
Jens Axboe6118b702009-06-30 09:34:12 +02004656 */
4657 cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
Shaohua Li30d7b942011-01-07 08:46:59 +01004658 cfqd->oom_cfqq.ref++;
Tejun Heo1adaf3d2012-03-05 13:15:15 -08004659
4660 spin_lock_irq(q->queue_lock);
Tejun Heof51b8022012-03-05 13:15:05 -08004661 cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, cfqd->root_group);
Tejun Heoeb7d8c072012-03-23 14:02:53 +01004662 cfqg_put(cfqd->root_group);
Tejun Heo1adaf3d2012-03-05 13:15:15 -08004663 spin_unlock_irq(q->queue_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004664
Jan Kara91148322016-06-08 15:11:39 +02004665 hrtimer_init(&cfqd->idle_slice_timer, CLOCK_MONOTONIC,
4666 HRTIMER_MODE_REL);
Jens Axboe22e2c502005-06-27 10:55:12 +02004667 cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
Jens Axboe22e2c502005-06-27 10:55:12 +02004668
Jens Axboe23e018a2009-10-05 08:52:35 +02004669 INIT_WORK(&cfqd->unplug_work, cfq_kick_queue);
Jens Axboe22e2c502005-06-27 10:55:12 +02004670
Linus Torvalds1da177e2005-04-16 15:20:36 -07004671 cfqd->cfq_quantum = cfq_quantum;
Jens Axboe22e2c502005-06-27 10:55:12 +02004672 cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
4673 cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07004674 cfqd->cfq_back_max = cfq_back_max;
4675 cfqd->cfq_back_penalty = cfq_back_penalty;
Jens Axboe22e2c502005-06-27 10:55:12 +02004676 cfqd->cfq_slice[0] = cfq_slice_async;
4677 cfqd->cfq_slice[1] = cfq_slice_sync;
Tao Ma5bf14c02012-04-01 14:33:39 -07004678 cfqd->cfq_target_latency = cfq_target_latency;
Jens Axboe22e2c502005-06-27 10:55:12 +02004679 cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
Jens Axboe0bb97942015-06-10 08:01:20 -06004680 cfqd->cfq_slice_idle = cfq_slice_idle;
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02004681 cfqd->cfq_group_idle = cfq_group_idle;
Jens Axboe963b72f2009-10-03 19:42:18 +02004682 cfqd->cfq_latency = 1;
Corrado Zoccoloe459dd02009-11-26 10:02:57 +01004683 cfqd->hw_tag = -1;
Corrado Zoccoloedc71132009-12-09 20:56:04 +01004684 /*
4685 * we optimistically start assuming sync ops weren't delayed in last
4686 * second, in order to have larger depth for async operations.
4687 */
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06004688 cfqd->last_delayed_sync = ktime_get_ns() - NSEC_PER_SEC;
Tejun Heob2fab5a2012-03-05 13:14:57 -08004689 return 0;
Tejun Heoa2b16932012-04-13 13:11:33 -07004690
4691out_free:
4692 kfree(cfqd);
Jianpeng Mad50235b2013-07-03 13:25:24 +02004693 kobject_put(&eq->kobj);
Tejun Heoa2b16932012-04-13 13:11:33 -07004694 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004695}
4696
Jens Axboe0bb97942015-06-10 08:01:20 -06004697static void cfq_registered_queue(struct request_queue *q)
4698{
4699 struct elevator_queue *e = q->elevator;
4700 struct cfq_data *cfqd = e->elevator_data;
4701
4702 /*
4703 * Default to IOPS mode with no idling for SSDs
4704 */
4705 if (blk_queue_nonrot(q))
4706 cfqd->cfq_slice_idle = 0;
4707}
4708
Linus Torvalds1da177e2005-04-16 15:20:36 -07004709/*
4710 * sysfs parts below -->
4711 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004712static ssize_t
4713cfq_var_show(unsigned int var, char *page)
4714{
Masanari Iida176167a2014-04-28 12:38:34 +09004715 return sprintf(page, "%u\n", var);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004716}
4717
4718static ssize_t
4719cfq_var_store(unsigned int *var, const char *page, size_t count)
4720{
4721 char *p = (char *) page;
4722
4723 *var = simple_strtoul(p, &p, 10);
4724 return count;
4725}
4726
Linus Torvalds1da177e2005-04-16 15:20:36 -07004727#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
Jens Axboeb374d182008-10-31 10:05:07 +01004728static ssize_t __FUNC(struct elevator_queue *e, char *page) \
Linus Torvalds1da177e2005-04-16 15:20:36 -07004729{ \
Al Viro3d1ab402006-03-18 18:35:43 -05004730 struct cfq_data *cfqd = e->elevator_data; \
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06004731 u64 __data = __VAR; \
Linus Torvalds1da177e2005-04-16 15:20:36 -07004732 if (__CONV) \
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06004733 __data = div_u64(__data, NSEC_PER_MSEC); \
Linus Torvalds1da177e2005-04-16 15:20:36 -07004734 return cfq_var_show(__data, (page)); \
4735}
4736SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
Jens Axboe22e2c502005-06-27 10:55:12 +02004737SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1);
4738SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1);
Al Viroe572ec72006-03-18 22:27:18 -05004739SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0);
4740SHOW_FUNCTION(cfq_back_seek_penalty_show, cfqd->cfq_back_penalty, 0);
Jens Axboe22e2c502005-06-27 10:55:12 +02004741SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1);
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02004742SHOW_FUNCTION(cfq_group_idle_show, cfqd->cfq_group_idle, 1);
Jens Axboe22e2c502005-06-27 10:55:12 +02004743SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
4744SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
4745SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
Jens Axboe963b72f2009-10-03 19:42:18 +02004746SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0);
Tao Ma5bf14c02012-04-01 14:33:39 -07004747SHOW_FUNCTION(cfq_target_latency_show, cfqd->cfq_target_latency, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004748#undef SHOW_FUNCTION
4749
Jeff Moyerd2d481d2016-06-08 15:11:38 +02004750#define USEC_SHOW_FUNCTION(__FUNC, __VAR) \
4751static ssize_t __FUNC(struct elevator_queue *e, char *page) \
4752{ \
4753 struct cfq_data *cfqd = e->elevator_data; \
4754 u64 __data = __VAR; \
4755 __data = div_u64(__data, NSEC_PER_USEC); \
4756 return cfq_var_show(__data, (page)); \
4757}
4758USEC_SHOW_FUNCTION(cfq_slice_idle_us_show, cfqd->cfq_slice_idle);
4759USEC_SHOW_FUNCTION(cfq_group_idle_us_show, cfqd->cfq_group_idle);
4760USEC_SHOW_FUNCTION(cfq_slice_sync_us_show, cfqd->cfq_slice[1]);
4761USEC_SHOW_FUNCTION(cfq_slice_async_us_show, cfqd->cfq_slice[0]);
4762USEC_SHOW_FUNCTION(cfq_target_latency_us_show, cfqd->cfq_target_latency);
4763#undef USEC_SHOW_FUNCTION
4764
Linus Torvalds1da177e2005-04-16 15:20:36 -07004765#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
Jens Axboeb374d182008-10-31 10:05:07 +01004766static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
Linus Torvalds1da177e2005-04-16 15:20:36 -07004767{ \
Al Viro3d1ab402006-03-18 18:35:43 -05004768 struct cfq_data *cfqd = e->elevator_data; \
Linus Torvalds1da177e2005-04-16 15:20:36 -07004769 unsigned int __data; \
4770 int ret = cfq_var_store(&__data, (page), count); \
4771 if (__data < (MIN)) \
4772 __data = (MIN); \
4773 else if (__data > (MAX)) \
4774 __data = (MAX); \
4775 if (__CONV) \
Jeff Moyer9a7f38c2016-06-08 08:55:34 -06004776 *(__PTR) = (u64)__data * NSEC_PER_MSEC; \
Linus Torvalds1da177e2005-04-16 15:20:36 -07004777 else \
4778 *(__PTR) = __data; \
4779 return ret; \
4780}
4781STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0);
Jens Axboefe094d92008-01-31 13:08:54 +01004782STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1,
4783 UINT_MAX, 1);
4784STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1,
4785 UINT_MAX, 1);
Al Viroe572ec72006-03-18 22:27:18 -05004786STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0);
Jens Axboefe094d92008-01-31 13:08:54 +01004787STORE_FUNCTION(cfq_back_seek_penalty_store, &cfqd->cfq_back_penalty, 1,
4788 UINT_MAX, 0);
Jens Axboe22e2c502005-06-27 10:55:12 +02004789STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1);
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02004790STORE_FUNCTION(cfq_group_idle_store, &cfqd->cfq_group_idle, 0, UINT_MAX, 1);
Jens Axboe22e2c502005-06-27 10:55:12 +02004791STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);
4792STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
Jens Axboefe094d92008-01-31 13:08:54 +01004793STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1,
4794 UINT_MAX, 0);
Jens Axboe963b72f2009-10-03 19:42:18 +02004795STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0);
Tao Ma5bf14c02012-04-01 14:33:39 -07004796STORE_FUNCTION(cfq_target_latency_store, &cfqd->cfq_target_latency, 1, UINT_MAX, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004797#undef STORE_FUNCTION
4798
Jeff Moyerd2d481d2016-06-08 15:11:38 +02004799#define USEC_STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \
4800static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
4801{ \
4802 struct cfq_data *cfqd = e->elevator_data; \
4803 unsigned int __data; \
4804 int ret = cfq_var_store(&__data, (page), count); \
4805 if (__data < (MIN)) \
4806 __data = (MIN); \
4807 else if (__data > (MAX)) \
4808 __data = (MAX); \
4809 *(__PTR) = (u64)__data * NSEC_PER_USEC; \
4810 return ret; \
4811}
4812USEC_STORE_FUNCTION(cfq_slice_idle_us_store, &cfqd->cfq_slice_idle, 0, UINT_MAX);
4813USEC_STORE_FUNCTION(cfq_group_idle_us_store, &cfqd->cfq_group_idle, 0, UINT_MAX);
4814USEC_STORE_FUNCTION(cfq_slice_sync_us_store, &cfqd->cfq_slice[1], 1, UINT_MAX);
4815USEC_STORE_FUNCTION(cfq_slice_async_us_store, &cfqd->cfq_slice[0], 1, UINT_MAX);
4816USEC_STORE_FUNCTION(cfq_target_latency_us_store, &cfqd->cfq_target_latency, 1, UINT_MAX);
4817#undef USEC_STORE_FUNCTION
4818
Al Viroe572ec72006-03-18 22:27:18 -05004819#define CFQ_ATTR(name) \
4820 __ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store)
Jens Axboe3b181522005-06-27 10:56:24 +02004821
Al Viroe572ec72006-03-18 22:27:18 -05004822static struct elv_fs_entry cfq_attrs[] = {
4823 CFQ_ATTR(quantum),
Al Viroe572ec72006-03-18 22:27:18 -05004824 CFQ_ATTR(fifo_expire_sync),
4825 CFQ_ATTR(fifo_expire_async),
4826 CFQ_ATTR(back_seek_max),
4827 CFQ_ATTR(back_seek_penalty),
4828 CFQ_ATTR(slice_sync),
Jeff Moyerd2d481d2016-06-08 15:11:38 +02004829 CFQ_ATTR(slice_sync_us),
Al Viroe572ec72006-03-18 22:27:18 -05004830 CFQ_ATTR(slice_async),
Jeff Moyerd2d481d2016-06-08 15:11:38 +02004831 CFQ_ATTR(slice_async_us),
Al Viroe572ec72006-03-18 22:27:18 -05004832 CFQ_ATTR(slice_async_rq),
4833 CFQ_ATTR(slice_idle),
Jeff Moyerd2d481d2016-06-08 15:11:38 +02004834 CFQ_ATTR(slice_idle_us),
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02004835 CFQ_ATTR(group_idle),
Jeff Moyerd2d481d2016-06-08 15:11:38 +02004836 CFQ_ATTR(group_idle_us),
Jens Axboe963b72f2009-10-03 19:42:18 +02004837 CFQ_ATTR(low_latency),
Tao Ma5bf14c02012-04-01 14:33:39 -07004838 CFQ_ATTR(target_latency),
Jeff Moyerd2d481d2016-06-08 15:11:38 +02004839 CFQ_ATTR(target_latency_us),
Al Viroe572ec72006-03-18 22:27:18 -05004840 __ATTR_NULL
Linus Torvalds1da177e2005-04-16 15:20:36 -07004841};
4842
Linus Torvalds1da177e2005-04-16 15:20:36 -07004843static struct elevator_type iosched_cfq = {
Jens Axboec51ca6c2016-12-10 15:13:59 -07004844 .ops.sq = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004845 .elevator_merge_fn = cfq_merge,
4846 .elevator_merged_fn = cfq_merged_request,
4847 .elevator_merge_req_fn = cfq_merged_requests,
Tahsin Erdogan72ef7992016-07-07 11:48:22 -07004848 .elevator_allow_bio_merge_fn = cfq_allow_bio_merge,
4849 .elevator_allow_rq_merge_fn = cfq_allow_rq_merge,
Divyesh Shah812d4022010-04-08 21:14:23 -07004850 .elevator_bio_merged_fn = cfq_bio_merged,
Jens Axboeb4878f22005-10-20 16:42:29 +02004851 .elevator_dispatch_fn = cfq_dispatch_requests,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004852 .elevator_add_req_fn = cfq_insert_request,
Jens Axboeb4878f22005-10-20 16:42:29 +02004853 .elevator_activate_req_fn = cfq_activate_request,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004854 .elevator_deactivate_req_fn = cfq_deactivate_request,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004855 .elevator_completed_req_fn = cfq_completed_request,
Jens Axboe21183b02006-07-13 12:33:14 +02004856 .elevator_former_req_fn = elv_rb_former_request,
4857 .elevator_latter_req_fn = elv_rb_latter_request,
Tejun Heo9b84cac2011-12-14 00:33:42 +01004858 .elevator_init_icq_fn = cfq_init_icq,
Tejun Heo7e5a8792011-12-14 00:33:42 +01004859 .elevator_exit_icq_fn = cfq_exit_icq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004860 .elevator_set_req_fn = cfq_set_request,
4861 .elevator_put_req_fn = cfq_put_request,
4862 .elevator_may_queue_fn = cfq_may_queue,
4863 .elevator_init_fn = cfq_init_queue,
4864 .elevator_exit_fn = cfq_exit_queue,
Jens Axboe0bb97942015-06-10 08:01:20 -06004865 .elevator_registered_fn = cfq_registered_queue,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004866 },
Tejun Heo3d3c2372011-12-14 00:33:42 +01004867 .icq_size = sizeof(struct cfq_io_cq),
4868 .icq_align = __alignof__(struct cfq_io_cq),
Al Viro3d1ab402006-03-18 18:35:43 -05004869 .elevator_attrs = cfq_attrs,
Tejun Heo3d3c2372011-12-14 00:33:42 +01004870 .elevator_name = "cfq",
Linus Torvalds1da177e2005-04-16 15:20:36 -07004871 .elevator_owner = THIS_MODULE,
4872};
4873
Vivek Goyal3e252062009-12-04 10:36:42 -05004874#ifdef CONFIG_CFQ_GROUP_IOSCHED
Tejun Heo3c798392012-04-16 13:57:25 -07004875static struct blkcg_policy blkcg_policy_cfq = {
Tejun Heo2ee867dc2015-08-18 14:55:34 -07004876 .dfl_cftypes = cfq_blkcg_files,
Tejun Heo880f50e2015-08-18 14:55:30 -07004877 .legacy_cftypes = cfq_blkcg_legacy_files,
Tejun Heof9fcc2d2012-04-16 13:57:27 -07004878
Tejun Heoe4a9bde2015-08-18 14:55:16 -07004879 .cpd_alloc_fn = cfq_cpd_alloc,
Arianna Avanzinie48453c2015-06-05 23:38:42 +02004880 .cpd_init_fn = cfq_cpd_init,
Tejun Heoe4a9bde2015-08-18 14:55:16 -07004881 .cpd_free_fn = cfq_cpd_free,
Tejun Heo69d7fde2015-08-18 14:55:36 -07004882 .cpd_bind_fn = cfq_cpd_bind,
Tejun Heoe4a9bde2015-08-18 14:55:16 -07004883
Tejun Heo001bea72015-08-18 14:55:11 -07004884 .pd_alloc_fn = cfq_pd_alloc,
Tejun Heof9fcc2d2012-04-16 13:57:27 -07004885 .pd_init_fn = cfq_pd_init,
Tejun Heo0b399202013-01-09 08:05:13 -08004886 .pd_offline_fn = cfq_pd_offline,
Tejun Heo001bea72015-08-18 14:55:11 -07004887 .pd_free_fn = cfq_pd_free,
Tejun Heof9fcc2d2012-04-16 13:57:27 -07004888 .pd_reset_stats_fn = cfq_pd_reset_stats,
Vivek Goyal3e252062009-12-04 10:36:42 -05004889};
Vivek Goyal3e252062009-12-04 10:36:42 -05004890#endif
4891
Linus Torvalds1da177e2005-04-16 15:20:36 -07004892static int __init cfq_init(void)
4893{
Tejun Heo3d3c2372011-12-14 00:33:42 +01004894 int ret;
4895
Vivek Goyal80bdf0c2010-08-23 12:24:26 +02004896#ifdef CONFIG_CFQ_GROUP_IOSCHED
Tejun Heo3c798392012-04-16 13:57:25 -07004897 ret = blkcg_policy_register(&blkcg_policy_cfq);
Tejun Heo8bd435b2012-04-13 13:11:28 -07004898 if (ret)
4899 return ret;
Tejun Heoffea73f2012-06-04 10:02:29 +02004900#else
4901 cfq_group_idle = 0;
4902#endif
Tejun Heo8bd435b2012-04-13 13:11:28 -07004903
Tejun Heofd794952012-06-04 10:01:38 +02004904 ret = -ENOMEM;
Tejun Heo3d3c2372011-12-14 00:33:42 +01004905 cfq_pool = KMEM_CACHE(cfq_queue, 0);
4906 if (!cfq_pool)
Tejun Heo8bd435b2012-04-13 13:11:28 -07004907 goto err_pol_unreg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004908
Tejun Heo3d3c2372011-12-14 00:33:42 +01004909 ret = elv_register(&iosched_cfq);
Tejun Heo8bd435b2012-04-13 13:11:28 -07004910 if (ret)
4911 goto err_free_pool;
Tejun Heo3d3c2372011-12-14 00:33:42 +01004912
Adrian Bunk2fdd82b2007-12-12 18:51:56 +01004913 return 0;
Tejun Heo8bd435b2012-04-13 13:11:28 -07004914
4915err_free_pool:
4916 kmem_cache_destroy(cfq_pool);
4917err_pol_unreg:
Tejun Heoffea73f2012-06-04 10:02:29 +02004918#ifdef CONFIG_CFQ_GROUP_IOSCHED
Tejun Heo3c798392012-04-16 13:57:25 -07004919 blkcg_policy_unregister(&blkcg_policy_cfq);
Tejun Heoffea73f2012-06-04 10:02:29 +02004920#endif
Tejun Heo8bd435b2012-04-13 13:11:28 -07004921 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004922}
4923
4924static void __exit cfq_exit(void)
4925{
Tejun Heoffea73f2012-06-04 10:02:29 +02004926#ifdef CONFIG_CFQ_GROUP_IOSCHED
Tejun Heo3c798392012-04-16 13:57:25 -07004927 blkcg_policy_unregister(&blkcg_policy_cfq);
Tejun Heoffea73f2012-06-04 10:02:29 +02004928#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07004929 elv_unregister(&iosched_cfq);
Tejun Heo3d3c2372011-12-14 00:33:42 +01004930 kmem_cache_destroy(cfq_pool);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004931}
4932
4933module_init(cfq_init);
4934module_exit(cfq_exit);
4935
4936MODULE_AUTHOR("Jens Axboe");
4937MODULE_LICENSE("GPL");
4938MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler");