blob: ee962f327ba543cf1f0e1aa6db7734294b5b2dcb [file] [log] [blame]
Vivek Goyal31e4c282009-12-03 12:59:42 -05001/*
2 * Common Block IO controller cgroup interface
3 *
4 * Based on ideas and code from CFQ, CFS and BFQ:
5 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
6 *
7 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
8 * Paolo Valente <paolo.valente@unimore.it>
9 *
10 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
11 * Nauman Rafique <nauman@google.com>
12 */
13#include <linux/ioprio.h>
Vivek Goyal22084192009-12-03 12:59:49 -050014#include <linux/seq_file.h>
15#include <linux/kdev_t.h>
Vivek Goyal9d6a9862009-12-04 10:36:41 -050016#include <linux/module.h>
Stephen Rothwellaccee782009-12-07 19:29:39 +110017#include <linux/err.h>
Divyesh Shah91952912010-04-01 15:01:41 -070018#include <linux/blkdev.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090019#include <linux/slab.h>
Gui Jianfeng34d0f172010-04-13 16:05:49 +080020#include <linux/genhd.h>
Tejun Heo72e06c22012-03-05 13:15:00 -080021#include <linux/delay.h>
22#include "blk-cgroup.h"
Tejun Heo5efd6112012-03-05 13:15:12 -080023#include "blk.h"
Vivek Goyal3e252062009-12-04 10:36:42 -050024
Divyesh Shah84c124d2010-04-09 08:31:19 +020025#define MAX_KEY_LEN 100
26
Vivek Goyal3e252062009-12-04 10:36:42 -050027static DEFINE_SPINLOCK(blkio_list_lock);
28static LIST_HEAD(blkio_list);
Vivek Goyalb1c35762009-12-03 12:59:47 -050029
Tejun Heo923adde2012-03-05 13:15:13 -080030static DEFINE_MUTEX(all_q_mutex);
31static LIST_HEAD(all_q_list);
32
Vivek Goyal31e4c282009-12-03 12:59:42 -050033struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
Vivek Goyal9d6a9862009-12-04 10:36:41 -050034EXPORT_SYMBOL_GPL(blkio_root_cgroup);
35
Tejun Heo035d10b2012-03-05 13:15:04 -080036static struct blkio_policy_type *blkio_policy[BLKIO_NR_POLICIES];
37
Ben Blum67523c42010-03-10 15:22:11 -080038static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *,
39 struct cgroup *);
Tejun Heobb9d97b2011-12-12 18:12:21 -080040static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *,
41 struct cgroup_taskset *);
42static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *,
43 struct cgroup_taskset *);
Tejun Heo7ee9c562012-03-05 13:15:11 -080044static int blkiocg_pre_destroy(struct cgroup_subsys *, struct cgroup *);
Ben Blum67523c42010-03-10 15:22:11 -080045static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *);
46static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
47
Vivek Goyal062a6442010-09-15 17:06:33 -040048/* for encoding cft->private value on file */
49#define BLKIOFILE_PRIVATE(x, val) (((x) << 16) | (val))
50/* What policy owns the file, proportional or throttle */
51#define BLKIOFILE_POLICY(val) (((val) >> 16) & 0xffff)
52#define BLKIOFILE_ATTR(val) ((val) & 0xffff)
53
Ben Blum67523c42010-03-10 15:22:11 -080054struct cgroup_subsys blkio_subsys = {
55 .name = "blkio",
56 .create = blkiocg_create,
Tejun Heobb9d97b2011-12-12 18:12:21 -080057 .can_attach = blkiocg_can_attach,
58 .attach = blkiocg_attach,
Tejun Heo7ee9c562012-03-05 13:15:11 -080059 .pre_destroy = blkiocg_pre_destroy,
Ben Blum67523c42010-03-10 15:22:11 -080060 .destroy = blkiocg_destroy,
61 .populate = blkiocg_populate,
Ben Blum67523c42010-03-10 15:22:11 -080062 .subsys_id = blkio_subsys_id,
Ben Blum67523c42010-03-10 15:22:11 -080063 .module = THIS_MODULE,
64};
65EXPORT_SYMBOL_GPL(blkio_subsys);
66
Vivek Goyal31e4c282009-12-03 12:59:42 -050067struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
68{
69 return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
70 struct blkio_cgroup, css);
71}
Vivek Goyal9d6a9862009-12-04 10:36:41 -050072EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
Vivek Goyal31e4c282009-12-03 12:59:42 -050073
Tejun Heo4f85cb92012-03-05 13:15:28 -080074static struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk)
Vivek Goyal70087dc2011-05-16 15:24:08 +020075{
76 return container_of(task_subsys_state(tsk, blkio_subsys_id),
77 struct blkio_cgroup, css);
78}
Tejun Heo4f85cb92012-03-05 13:15:28 -080079
80struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio)
81{
82 if (bio && bio->bi_css)
83 return container_of(bio->bi_css, struct blkio_cgroup, css);
84 return task_blkio_cgroup(current);
85}
86EXPORT_SYMBOL_GPL(bio_blkio_cgroup);
Vivek Goyal70087dc2011-05-16 15:24:08 +020087
Tejun Heoc1768262012-03-05 13:15:17 -080088static inline void blkio_update_group_weight(struct blkio_group *blkg,
89 int plid, unsigned int weight)
Vivek Goyal062a6442010-09-15 17:06:33 -040090{
91 struct blkio_policy_type *blkiop;
92
93 list_for_each_entry(blkiop, &blkio_list, list) {
94 /* If this policy does not own the blkg, do not send updates */
Tejun Heoc1768262012-03-05 13:15:17 -080095 if (blkiop->plid != plid)
Vivek Goyal062a6442010-09-15 17:06:33 -040096 continue;
97 if (blkiop->ops.blkio_update_group_weight_fn)
Tejun Heoca32aef2012-03-05 13:15:03 -080098 blkiop->ops.blkio_update_group_weight_fn(blkg->q,
Vivek Goyalfe071432010-10-01 14:49:49 +020099 blkg, weight);
Vivek Goyal062a6442010-09-15 17:06:33 -0400100 }
101}
102
Tejun Heoc1768262012-03-05 13:15:17 -0800103static inline void blkio_update_group_bps(struct blkio_group *blkg, int plid,
104 u64 bps, int fileid)
Vivek Goyal4c9eefa2010-09-15 17:06:34 -0400105{
106 struct blkio_policy_type *blkiop;
107
108 list_for_each_entry(blkiop, &blkio_list, list) {
109
110 /* If this policy does not own the blkg, do not send updates */
Tejun Heoc1768262012-03-05 13:15:17 -0800111 if (blkiop->plid != plid)
Vivek Goyal4c9eefa2010-09-15 17:06:34 -0400112 continue;
113
114 if (fileid == BLKIO_THROTL_read_bps_device
115 && blkiop->ops.blkio_update_group_read_bps_fn)
Tejun Heoca32aef2012-03-05 13:15:03 -0800116 blkiop->ops.blkio_update_group_read_bps_fn(blkg->q,
Vivek Goyalfe071432010-10-01 14:49:49 +0200117 blkg, bps);
Vivek Goyal4c9eefa2010-09-15 17:06:34 -0400118
119 if (fileid == BLKIO_THROTL_write_bps_device
120 && blkiop->ops.blkio_update_group_write_bps_fn)
Tejun Heoca32aef2012-03-05 13:15:03 -0800121 blkiop->ops.blkio_update_group_write_bps_fn(blkg->q,
Vivek Goyalfe071432010-10-01 14:49:49 +0200122 blkg, bps);
Vivek Goyal4c9eefa2010-09-15 17:06:34 -0400123 }
124}
125
Vivek Goyal7702e8f2010-09-15 17:06:36 -0400126static inline void blkio_update_group_iops(struct blkio_group *blkg,
Tejun Heoc1768262012-03-05 13:15:17 -0800127 int plid, unsigned int iops,
128 int fileid)
Vivek Goyal7702e8f2010-09-15 17:06:36 -0400129{
130 struct blkio_policy_type *blkiop;
131
132 list_for_each_entry(blkiop, &blkio_list, list) {
133
134 /* If this policy does not own the blkg, do not send updates */
Tejun Heoc1768262012-03-05 13:15:17 -0800135 if (blkiop->plid != plid)
Vivek Goyal7702e8f2010-09-15 17:06:36 -0400136 continue;
137
138 if (fileid == BLKIO_THROTL_read_iops_device
139 && blkiop->ops.blkio_update_group_read_iops_fn)
Tejun Heoca32aef2012-03-05 13:15:03 -0800140 blkiop->ops.blkio_update_group_read_iops_fn(blkg->q,
Vivek Goyalfe071432010-10-01 14:49:49 +0200141 blkg, iops);
Vivek Goyal7702e8f2010-09-15 17:06:36 -0400142
143 if (fileid == BLKIO_THROTL_write_iops_device
144 && blkiop->ops.blkio_update_group_write_iops_fn)
Tejun Heoca32aef2012-03-05 13:15:03 -0800145 blkiop->ops.blkio_update_group_write_iops_fn(blkg->q,
Vivek Goyalfe071432010-10-01 14:49:49 +0200146 blkg,iops);
Vivek Goyal7702e8f2010-09-15 17:06:36 -0400147 }
148}
149
Divyesh Shah91952912010-04-01 15:01:41 -0700150/*
151 * Add to the appropriate stat variable depending on the request type.
152 * This should be called with the blkg->stats_lock held.
153 */
Divyesh Shah84c124d2010-04-09 08:31:19 +0200154static void blkio_add_stat(uint64_t *stat, uint64_t add, bool direction,
155 bool sync)
Divyesh Shah91952912010-04-01 15:01:41 -0700156{
Divyesh Shah84c124d2010-04-09 08:31:19 +0200157 if (direction)
158 stat[BLKIO_STAT_WRITE] += add;
Divyesh Shah91952912010-04-01 15:01:41 -0700159 else
Divyesh Shah84c124d2010-04-09 08:31:19 +0200160 stat[BLKIO_STAT_READ] += add;
161 if (sync)
162 stat[BLKIO_STAT_SYNC] += add;
Divyesh Shah91952912010-04-01 15:01:41 -0700163 else
Divyesh Shah84c124d2010-04-09 08:31:19 +0200164 stat[BLKIO_STAT_ASYNC] += add;
Divyesh Shah91952912010-04-01 15:01:41 -0700165}
166
Divyesh Shahcdc11842010-04-08 21:15:10 -0700167/*
168 * Decrements the appropriate stat variable if non-zero depending on the
169 * request type. Panics on value being zero.
170 * This should be called with the blkg->stats_lock held.
171 */
172static void blkio_check_and_dec_stat(uint64_t *stat, bool direction, bool sync)
173{
174 if (direction) {
175 BUG_ON(stat[BLKIO_STAT_WRITE] == 0);
176 stat[BLKIO_STAT_WRITE]--;
177 } else {
178 BUG_ON(stat[BLKIO_STAT_READ] == 0);
179 stat[BLKIO_STAT_READ]--;
180 }
181 if (sync) {
182 BUG_ON(stat[BLKIO_STAT_SYNC] == 0);
183 stat[BLKIO_STAT_SYNC]--;
184 } else {
185 BUG_ON(stat[BLKIO_STAT_ASYNC] == 0);
186 stat[BLKIO_STAT_ASYNC]--;
187 }
188}
189
190#ifdef CONFIG_DEBUG_BLK_CGROUP
Divyesh Shah812df482010-04-08 21:15:35 -0700191/* This should be called with the blkg->stats_lock held. */
192static void blkio_set_start_group_wait_time(struct blkio_group *blkg,
Tejun Heoc1768262012-03-05 13:15:17 -0800193 struct blkio_policy_type *pol,
194 struct blkio_group *curr_blkg)
Divyesh Shah812df482010-04-08 21:15:35 -0700195{
Tejun Heoc1768262012-03-05 13:15:17 -0800196 struct blkg_policy_data *pd = blkg->pd[pol->plid];
Tejun Heo549d3aa2012-03-05 13:15:16 -0800197
198 if (blkio_blkg_waiting(&pd->stats))
Divyesh Shah812df482010-04-08 21:15:35 -0700199 return;
200 if (blkg == curr_blkg)
201 return;
Tejun Heo549d3aa2012-03-05 13:15:16 -0800202 pd->stats.start_group_wait_time = sched_clock();
203 blkio_mark_blkg_waiting(&pd->stats);
Divyesh Shah812df482010-04-08 21:15:35 -0700204}
205
206/* This should be called with the blkg->stats_lock held. */
207static void blkio_update_group_wait_time(struct blkio_group_stats *stats)
208{
209 unsigned long long now;
210
211 if (!blkio_blkg_waiting(stats))
212 return;
213
214 now = sched_clock();
215 if (time_after64(now, stats->start_group_wait_time))
216 stats->group_wait_time += now - stats->start_group_wait_time;
217 blkio_clear_blkg_waiting(stats);
218}
219
220/* This should be called with the blkg->stats_lock held. */
221static void blkio_end_empty_time(struct blkio_group_stats *stats)
222{
223 unsigned long long now;
224
225 if (!blkio_blkg_empty(stats))
226 return;
227
228 now = sched_clock();
229 if (time_after64(now, stats->start_empty_time))
230 stats->empty_time += now - stats->start_empty_time;
231 blkio_clear_blkg_empty(stats);
232}
233
Tejun Heoc1768262012-03-05 13:15:17 -0800234void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
235 struct blkio_policy_type *pol)
Divyesh Shah812df482010-04-08 21:15:35 -0700236{
Tejun Heoc1768262012-03-05 13:15:17 -0800237 struct blkg_policy_data *pd = blkg->pd[pol->plid];
Divyesh Shah812df482010-04-08 21:15:35 -0700238 unsigned long flags;
239
240 spin_lock_irqsave(&blkg->stats_lock, flags);
Tejun Heo549d3aa2012-03-05 13:15:16 -0800241 BUG_ON(blkio_blkg_idling(&pd->stats));
242 pd->stats.start_idle_time = sched_clock();
243 blkio_mark_blkg_idling(&pd->stats);
Divyesh Shah812df482010-04-08 21:15:35 -0700244 spin_unlock_irqrestore(&blkg->stats_lock, flags);
245}
246EXPORT_SYMBOL_GPL(blkiocg_update_set_idle_time_stats);
247
Tejun Heoc1768262012-03-05 13:15:17 -0800248void blkiocg_update_idle_time_stats(struct blkio_group *blkg,
249 struct blkio_policy_type *pol)
Divyesh Shah812df482010-04-08 21:15:35 -0700250{
Tejun Heoc1768262012-03-05 13:15:17 -0800251 struct blkg_policy_data *pd = blkg->pd[pol->plid];
Divyesh Shah812df482010-04-08 21:15:35 -0700252 unsigned long flags;
253 unsigned long long now;
254 struct blkio_group_stats *stats;
255
256 spin_lock_irqsave(&blkg->stats_lock, flags);
Tejun Heo549d3aa2012-03-05 13:15:16 -0800257 stats = &pd->stats;
Divyesh Shah812df482010-04-08 21:15:35 -0700258 if (blkio_blkg_idling(stats)) {
259 now = sched_clock();
260 if (time_after64(now, stats->start_idle_time))
261 stats->idle_time += now - stats->start_idle_time;
262 blkio_clear_blkg_idling(stats);
263 }
264 spin_unlock_irqrestore(&blkg->stats_lock, flags);
265}
266EXPORT_SYMBOL_GPL(blkiocg_update_idle_time_stats);
267
Tejun Heoc1768262012-03-05 13:15:17 -0800268void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
269 struct blkio_policy_type *pol)
Divyesh Shahcdc11842010-04-08 21:15:10 -0700270{
Tejun Heoc1768262012-03-05 13:15:17 -0800271 struct blkg_policy_data *pd = blkg->pd[pol->plid];
Divyesh Shahcdc11842010-04-08 21:15:10 -0700272 unsigned long flags;
273 struct blkio_group_stats *stats;
274
275 spin_lock_irqsave(&blkg->stats_lock, flags);
Tejun Heo549d3aa2012-03-05 13:15:16 -0800276 stats = &pd->stats;
Divyesh Shahcdc11842010-04-08 21:15:10 -0700277 stats->avg_queue_size_sum +=
278 stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] +
279 stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE];
280 stats->avg_queue_size_samples++;
Divyesh Shah812df482010-04-08 21:15:35 -0700281 blkio_update_group_wait_time(stats);
Divyesh Shahcdc11842010-04-08 21:15:10 -0700282 spin_unlock_irqrestore(&blkg->stats_lock, flags);
283}
Divyesh Shaha11cdaa2010-04-13 19:59:17 +0200284EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats);
285
Tejun Heoc1768262012-03-05 13:15:17 -0800286void blkiocg_set_start_empty_time(struct blkio_group *blkg,
287 struct blkio_policy_type *pol)
Divyesh Shah28baf442010-04-14 11:22:38 +0200288{
Tejun Heoc1768262012-03-05 13:15:17 -0800289 struct blkg_policy_data *pd = blkg->pd[pol->plid];
Divyesh Shah28baf442010-04-14 11:22:38 +0200290 unsigned long flags;
291 struct blkio_group_stats *stats;
292
293 spin_lock_irqsave(&blkg->stats_lock, flags);
Tejun Heo549d3aa2012-03-05 13:15:16 -0800294 stats = &pd->stats;
Divyesh Shah28baf442010-04-14 11:22:38 +0200295
296 if (stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] ||
297 stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]) {
298 spin_unlock_irqrestore(&blkg->stats_lock, flags);
299 return;
300 }
301
302 /*
Vivek Goyale5ff0822010-04-26 19:25:11 +0200303 * group is already marked empty. This can happen if cfqq got new
304 * request in parent group and moved to this group while being added
305 * to service tree. Just ignore the event and move on.
Divyesh Shah28baf442010-04-14 11:22:38 +0200306 */
Vivek Goyale5ff0822010-04-26 19:25:11 +0200307 if(blkio_blkg_empty(stats)) {
308 spin_unlock_irqrestore(&blkg->stats_lock, flags);
309 return;
310 }
311
Divyesh Shah28baf442010-04-14 11:22:38 +0200312 stats->start_empty_time = sched_clock();
313 blkio_mark_blkg_empty(stats);
314 spin_unlock_irqrestore(&blkg->stats_lock, flags);
315}
316EXPORT_SYMBOL_GPL(blkiocg_set_start_empty_time);
317
Divyesh Shaha11cdaa2010-04-13 19:59:17 +0200318void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
Tejun Heoc1768262012-03-05 13:15:17 -0800319 struct blkio_policy_type *pol,
320 unsigned long dequeue)
Divyesh Shaha11cdaa2010-04-13 19:59:17 +0200321{
Tejun Heoc1768262012-03-05 13:15:17 -0800322 struct blkg_policy_data *pd = blkg->pd[pol->plid];
Tejun Heo549d3aa2012-03-05 13:15:16 -0800323
324 pd->stats.dequeue += dequeue;
Divyesh Shaha11cdaa2010-04-13 19:59:17 +0200325}
326EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats);
Divyesh Shah812df482010-04-08 21:15:35 -0700327#else
328static inline void blkio_set_start_group_wait_time(struct blkio_group *blkg,
Tejun Heoc1768262012-03-05 13:15:17 -0800329 struct blkio_policy_type *pol,
330 struct blkio_group *curr_blkg) { }
331static inline void blkio_end_empty_time(struct blkio_group_stats *stats) { }
Divyesh Shahcdc11842010-04-08 21:15:10 -0700332#endif
333
Divyesh Shaha11cdaa2010-04-13 19:59:17 +0200334void blkiocg_update_io_add_stats(struct blkio_group *blkg,
Tejun Heoc1768262012-03-05 13:15:17 -0800335 struct blkio_policy_type *pol,
336 struct blkio_group *curr_blkg, bool direction,
337 bool sync)
Divyesh Shahcdc11842010-04-08 21:15:10 -0700338{
Tejun Heoc1768262012-03-05 13:15:17 -0800339 struct blkg_policy_data *pd = blkg->pd[pol->plid];
Divyesh Shahcdc11842010-04-08 21:15:10 -0700340 unsigned long flags;
341
342 spin_lock_irqsave(&blkg->stats_lock, flags);
Tejun Heo549d3aa2012-03-05 13:15:16 -0800343 blkio_add_stat(pd->stats.stat_arr[BLKIO_STAT_QUEUED], 1, direction,
Divyesh Shahcdc11842010-04-08 21:15:10 -0700344 sync);
Tejun Heo549d3aa2012-03-05 13:15:16 -0800345 blkio_end_empty_time(&pd->stats);
Tejun Heoc1768262012-03-05 13:15:17 -0800346 blkio_set_start_group_wait_time(blkg, pol, curr_blkg);
Divyesh Shahcdc11842010-04-08 21:15:10 -0700347 spin_unlock_irqrestore(&blkg->stats_lock, flags);
348}
Divyesh Shaha11cdaa2010-04-13 19:59:17 +0200349EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats);
Divyesh Shahcdc11842010-04-08 21:15:10 -0700350
Divyesh Shaha11cdaa2010-04-13 19:59:17 +0200351void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
Tejun Heoc1768262012-03-05 13:15:17 -0800352 struct blkio_policy_type *pol,
353 bool direction, bool sync)
Divyesh Shahcdc11842010-04-08 21:15:10 -0700354{
Tejun Heoc1768262012-03-05 13:15:17 -0800355 struct blkg_policy_data *pd = blkg->pd[pol->plid];
Divyesh Shahcdc11842010-04-08 21:15:10 -0700356 unsigned long flags;
357
358 spin_lock_irqsave(&blkg->stats_lock, flags);
Tejun Heo549d3aa2012-03-05 13:15:16 -0800359 blkio_check_and_dec_stat(pd->stats.stat_arr[BLKIO_STAT_QUEUED],
Divyesh Shahcdc11842010-04-08 21:15:10 -0700360 direction, sync);
361 spin_unlock_irqrestore(&blkg->stats_lock, flags);
362}
Divyesh Shaha11cdaa2010-04-13 19:59:17 +0200363EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats);
Divyesh Shahcdc11842010-04-08 21:15:10 -0700364
Tejun Heoc1768262012-03-05 13:15:17 -0800365void blkiocg_update_timeslice_used(struct blkio_group *blkg,
366 struct blkio_policy_type *pol,
367 unsigned long time,
368 unsigned long unaccounted_time)
Vivek Goyal22084192009-12-03 12:59:49 -0500369{
Tejun Heoc1768262012-03-05 13:15:17 -0800370 struct blkg_policy_data *pd = blkg->pd[pol->plid];
Divyesh Shah303a3ac2010-04-01 15:01:24 -0700371 unsigned long flags;
372
373 spin_lock_irqsave(&blkg->stats_lock, flags);
Tejun Heo549d3aa2012-03-05 13:15:16 -0800374 pd->stats.time += time;
Vivek Goyala23e6862011-05-19 15:38:20 -0400375#ifdef CONFIG_DEBUG_BLK_CGROUP
Tejun Heo549d3aa2012-03-05 13:15:16 -0800376 pd->stats.unaccounted_time += unaccounted_time;
Vivek Goyala23e6862011-05-19 15:38:20 -0400377#endif
Divyesh Shah303a3ac2010-04-01 15:01:24 -0700378 spin_unlock_irqrestore(&blkg->stats_lock, flags);
Vivek Goyal22084192009-12-03 12:59:49 -0500379}
Divyesh Shah303a3ac2010-04-01 15:01:24 -0700380EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
Vivek Goyal22084192009-12-03 12:59:49 -0500381
Vivek Goyal5624a4e2011-05-19 15:38:28 -0400382/*
383 * should be called under rcu read lock or queue lock to make sure blkg pointer
384 * is valid.
385 */
Divyesh Shah84c124d2010-04-09 08:31:19 +0200386void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
Tejun Heoc1768262012-03-05 13:15:17 -0800387 struct blkio_policy_type *pol,
388 uint64_t bytes, bool direction, bool sync)
Divyesh Shah91952912010-04-01 15:01:41 -0700389{
Tejun Heoc1768262012-03-05 13:15:17 -0800390 struct blkg_policy_data *pd = blkg->pd[pol->plid];
Vivek Goyal5624a4e2011-05-19 15:38:28 -0400391 struct blkio_group_stats_cpu *stats_cpu;
Vivek Goyal575969a2011-05-19 15:38:29 -0400392 unsigned long flags;
393
394 /*
395 * Disabling interrupts to provide mutual exclusion between two
396 * writes on same cpu. It probably is not needed for 64bit. Not
397 * optimizing that case yet.
398 */
399 local_irq_save(flags);
Divyesh Shah91952912010-04-01 15:01:41 -0700400
Tejun Heo549d3aa2012-03-05 13:15:16 -0800401 stats_cpu = this_cpu_ptr(pd->stats_cpu);
Vivek Goyal5624a4e2011-05-19 15:38:28 -0400402
Vivek Goyal575969a2011-05-19 15:38:29 -0400403 u64_stats_update_begin(&stats_cpu->syncp);
Vivek Goyal5624a4e2011-05-19 15:38:28 -0400404 stats_cpu->sectors += bytes >> 9;
405 blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_SERVICED],
406 1, direction, sync);
407 blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_SERVICE_BYTES],
408 bytes, direction, sync);
Vivek Goyal575969a2011-05-19 15:38:29 -0400409 u64_stats_update_end(&stats_cpu->syncp);
410 local_irq_restore(flags);
Divyesh Shah91952912010-04-01 15:01:41 -0700411}
Divyesh Shah84c124d2010-04-09 08:31:19 +0200412EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats);
Divyesh Shah91952912010-04-01 15:01:41 -0700413
Divyesh Shah84c124d2010-04-09 08:31:19 +0200414void blkiocg_update_completion_stats(struct blkio_group *blkg,
Tejun Heoc1768262012-03-05 13:15:17 -0800415 struct blkio_policy_type *pol,
416 uint64_t start_time,
417 uint64_t io_start_time, bool direction,
418 bool sync)
Divyesh Shah91952912010-04-01 15:01:41 -0700419{
Tejun Heoc1768262012-03-05 13:15:17 -0800420 struct blkg_policy_data *pd = blkg->pd[pol->plid];
Divyesh Shah91952912010-04-01 15:01:41 -0700421 struct blkio_group_stats *stats;
422 unsigned long flags;
423 unsigned long long now = sched_clock();
424
425 spin_lock_irqsave(&blkg->stats_lock, flags);
Tejun Heo549d3aa2012-03-05 13:15:16 -0800426 stats = &pd->stats;
Divyesh Shah84c124d2010-04-09 08:31:19 +0200427 if (time_after64(now, io_start_time))
428 blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_TIME],
429 now - io_start_time, direction, sync);
430 if (time_after64(io_start_time, start_time))
431 blkio_add_stat(stats->stat_arr[BLKIO_STAT_WAIT_TIME],
432 io_start_time - start_time, direction, sync);
Divyesh Shah91952912010-04-01 15:01:41 -0700433 spin_unlock_irqrestore(&blkg->stats_lock, flags);
434}
Divyesh Shah84c124d2010-04-09 08:31:19 +0200435EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats);
Divyesh Shah91952912010-04-01 15:01:41 -0700436
Vivek Goyal317389a2011-05-23 10:02:19 +0200437/* Merged stats are per cpu. */
Tejun Heoc1768262012-03-05 13:15:17 -0800438void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
439 struct blkio_policy_type *pol,
440 bool direction, bool sync)
Divyesh Shah812d4022010-04-08 21:14:23 -0700441{
Tejun Heoc1768262012-03-05 13:15:17 -0800442 struct blkg_policy_data *pd = blkg->pd[pol->plid];
Vivek Goyal317389a2011-05-23 10:02:19 +0200443 struct blkio_group_stats_cpu *stats_cpu;
Divyesh Shah812d4022010-04-08 21:14:23 -0700444 unsigned long flags;
445
Vivek Goyal317389a2011-05-23 10:02:19 +0200446 /*
447 * Disabling interrupts to provide mutual exclusion between two
448 * writes on same cpu. It probably is not needed for 64bit. Not
449 * optimizing that case yet.
450 */
451 local_irq_save(flags);
452
Tejun Heo549d3aa2012-03-05 13:15:16 -0800453 stats_cpu = this_cpu_ptr(pd->stats_cpu);
Vivek Goyal317389a2011-05-23 10:02:19 +0200454
455 u64_stats_update_begin(&stats_cpu->syncp);
456 blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_MERGED], 1,
457 direction, sync);
458 u64_stats_update_end(&stats_cpu->syncp);
459 local_irq_restore(flags);
Divyesh Shah812d4022010-04-08 21:14:23 -0700460}
461EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
462
Tejun Heo03814112012-03-05 13:15:14 -0800463/**
464 * blkg_free - free a blkg
465 * @blkg: blkg to free
466 *
467 * Free @blkg which may be partially allocated.
468 */
469static void blkg_free(struct blkio_group *blkg)
470{
Tejun Heoe8989fa2012-03-05 13:15:20 -0800471 int i;
Tejun Heo549d3aa2012-03-05 13:15:16 -0800472
473 if (!blkg)
474 return;
475
Tejun Heoe8989fa2012-03-05 13:15:20 -0800476 for (i = 0; i < BLKIO_NR_POLICIES; i++) {
477 struct blkg_policy_data *pd = blkg->pd[i];
478
479 if (pd) {
480 free_percpu(pd->stats_cpu);
481 kfree(pd);
482 }
Tejun Heo03814112012-03-05 13:15:14 -0800483 }
Tejun Heoe8989fa2012-03-05 13:15:20 -0800484
Tejun Heo549d3aa2012-03-05 13:15:16 -0800485 kfree(blkg);
Tejun Heo03814112012-03-05 13:15:14 -0800486}
487
488/**
489 * blkg_alloc - allocate a blkg
490 * @blkcg: block cgroup the new blkg is associated with
491 * @q: request_queue the new blkg is associated with
Tejun Heo03814112012-03-05 13:15:14 -0800492 *
Tejun Heoe8989fa2012-03-05 13:15:20 -0800493 * Allocate a new blkg assocating @blkcg and @q.
Tejun Heo03814112012-03-05 13:15:14 -0800494 *
495 * FIXME: Should be called with queue locked but currently isn't due to
496 * percpu stat breakage.
497 */
498static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
Tejun Heoe8989fa2012-03-05 13:15:20 -0800499 struct request_queue *q)
Tejun Heo03814112012-03-05 13:15:14 -0800500{
501 struct blkio_group *blkg;
Tejun Heoe8989fa2012-03-05 13:15:20 -0800502 int i;
Tejun Heo03814112012-03-05 13:15:14 -0800503
504 /* alloc and init base part */
505 blkg = kzalloc_node(sizeof(*blkg), GFP_ATOMIC, q->node);
506 if (!blkg)
507 return NULL;
508
509 spin_lock_init(&blkg->stats_lock);
Tejun Heoc875f4d2012-03-05 13:15:22 -0800510 blkg->q = q;
Tejun Heoe8989fa2012-03-05 13:15:20 -0800511 INIT_LIST_HEAD(&blkg->q_node);
Tejun Heo03814112012-03-05 13:15:14 -0800512 blkg->blkcg = blkcg;
Tejun Heo1adaf3d2012-03-05 13:15:15 -0800513 blkg->refcnt = 1;
Tejun Heo03814112012-03-05 13:15:14 -0800514 cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
515
Tejun Heoe8989fa2012-03-05 13:15:20 -0800516 for (i = 0; i < BLKIO_NR_POLICIES; i++) {
517 struct blkio_policy_type *pol = blkio_policy[i];
518 struct blkg_policy_data *pd;
Tejun Heo03814112012-03-05 13:15:14 -0800519
Tejun Heoe8989fa2012-03-05 13:15:20 -0800520 if (!pol)
521 continue;
Tejun Heo549d3aa2012-03-05 13:15:16 -0800522
Tejun Heoe8989fa2012-03-05 13:15:20 -0800523 /* alloc per-policy data and attach it to blkg */
524 pd = kzalloc_node(sizeof(*pd) + pol->pdata_size, GFP_ATOMIC,
525 q->node);
526 if (!pd) {
527 blkg_free(blkg);
528 return NULL;
529 }
Tejun Heo549d3aa2012-03-05 13:15:16 -0800530
Tejun Heoe8989fa2012-03-05 13:15:20 -0800531 blkg->pd[i] = pd;
532 pd->blkg = blkg;
533
534 /* broken, read comment in the callsite */
535 pd->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
536 if (!pd->stats_cpu) {
537 blkg_free(blkg);
538 return NULL;
539 }
Tejun Heo03814112012-03-05 13:15:14 -0800540 }
541
Tejun Heo549d3aa2012-03-05 13:15:16 -0800542 /* invoke per-policy init */
Tejun Heoe8989fa2012-03-05 13:15:20 -0800543 for (i = 0; i < BLKIO_NR_POLICIES; i++) {
544 struct blkio_policy_type *pol = blkio_policy[i];
545
546 if (pol)
547 pol->ops.blkio_init_group_fn(blkg);
548 }
549
Tejun Heo03814112012-03-05 13:15:14 -0800550 return blkg;
551}
552
Tejun Heocd1604f2012-03-05 13:15:06 -0800553struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
554 struct request_queue *q,
555 enum blkio_policy_id plid,
556 bool for_root)
557 __releases(q->queue_lock) __acquires(q->queue_lock)
Vivek Goyal5624a4e2011-05-19 15:38:28 -0400558{
Tejun Heocd1604f2012-03-05 13:15:06 -0800559 struct blkio_group *blkg, *new_blkg;
Vivek Goyal5624a4e2011-05-19 15:38:28 -0400560
Tejun Heocd1604f2012-03-05 13:15:06 -0800561 WARN_ON_ONCE(!rcu_read_lock_held());
562 lockdep_assert_held(q->queue_lock);
Vivek Goyal31e4c282009-12-03 12:59:42 -0500563
Tejun Heocd1604f2012-03-05 13:15:06 -0800564 /*
565 * This could be the first entry point of blkcg implementation and
566 * we shouldn't allow anything to go through for a bypassing queue.
567 * The following can be removed if blkg lookup is guaranteed to
568 * fail on a bypassing queue.
569 */
570 if (unlikely(blk_queue_bypass(q)) && !for_root)
571 return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
572
Tejun Heoe8989fa2012-03-05 13:15:20 -0800573 blkg = blkg_lookup(blkcg, q);
Tejun Heocd1604f2012-03-05 13:15:06 -0800574 if (blkg)
575 return blkg;
576
Tejun Heo7ee9c562012-03-05 13:15:11 -0800577 /* blkg holds a reference to blkcg */
Tejun Heocd1604f2012-03-05 13:15:06 -0800578 if (!css_tryget(&blkcg->css))
579 return ERR_PTR(-EINVAL);
580
581 /*
582 * Allocate and initialize.
583 *
584 * FIXME: The following is broken. Percpu memory allocation
585 * requires %GFP_KERNEL context and can't be performed from IO
586 * path. Allocation here should inherently be atomic and the
587 * following lock dancing can be removed once the broken percpu
588 * allocation is fixed.
589 */
590 spin_unlock_irq(q->queue_lock);
591 rcu_read_unlock();
592
Tejun Heoe8989fa2012-03-05 13:15:20 -0800593 new_blkg = blkg_alloc(blkcg, q);
Tejun Heocd1604f2012-03-05 13:15:06 -0800594
595 rcu_read_lock();
596 spin_lock_irq(q->queue_lock);
Tejun Heocd1604f2012-03-05 13:15:06 -0800597
598 /* did bypass get turned on inbetween? */
599 if (unlikely(blk_queue_bypass(q)) && !for_root) {
600 blkg = ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
601 goto out;
602 }
603
604 /* did someone beat us to it? */
Tejun Heoe8989fa2012-03-05 13:15:20 -0800605 blkg = blkg_lookup(blkcg, q);
Tejun Heocd1604f2012-03-05 13:15:06 -0800606 if (unlikely(blkg))
607 goto out;
608
609 /* did alloc fail? */
Tejun Heo03814112012-03-05 13:15:14 -0800610 if (unlikely(!new_blkg)) {
Tejun Heocd1604f2012-03-05 13:15:06 -0800611 blkg = ERR_PTR(-ENOMEM);
612 goto out;
613 }
614
615 /* insert */
616 spin_lock(&blkcg->lock);
617 swap(blkg, new_blkg);
Tejun Heo03aa2642012-03-05 13:15:19 -0800618
Vivek Goyal31e4c282009-12-03 12:59:42 -0500619 hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
Tejun Heoe8989fa2012-03-05 13:15:20 -0800620 list_add(&blkg->q_node, &q->blkg_list);
Tejun Heo03aa2642012-03-05 13:15:19 -0800621
Tejun Heocd1604f2012-03-05 13:15:06 -0800622 spin_unlock(&blkcg->lock);
623out:
Tejun Heo03814112012-03-05 13:15:14 -0800624 blkg_free(new_blkg);
Tejun Heocd1604f2012-03-05 13:15:06 -0800625 return blkg;
Vivek Goyal31e4c282009-12-03 12:59:42 -0500626}
Tejun Heocd1604f2012-03-05 13:15:06 -0800627EXPORT_SYMBOL_GPL(blkg_lookup_create);
Vivek Goyal31e4c282009-12-03 12:59:42 -0500628
Vivek Goyal31e4c282009-12-03 12:59:42 -0500629/* called under rcu_read_lock(). */
Tejun Heocd1604f2012-03-05 13:15:06 -0800630struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
Tejun Heoe8989fa2012-03-05 13:15:20 -0800631 struct request_queue *q)
Vivek Goyal31e4c282009-12-03 12:59:42 -0500632{
633 struct blkio_group *blkg;
634 struct hlist_node *n;
Vivek Goyal31e4c282009-12-03 12:59:42 -0500635
Tejun Heoca32aef2012-03-05 13:15:03 -0800636 hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node)
Tejun Heoe8989fa2012-03-05 13:15:20 -0800637 if (blkg->q == q)
Vivek Goyal31e4c282009-12-03 12:59:42 -0500638 return blkg;
Vivek Goyal31e4c282009-12-03 12:59:42 -0500639 return NULL;
640}
Tejun Heocd1604f2012-03-05 13:15:06 -0800641EXPORT_SYMBOL_GPL(blkg_lookup);
Vivek Goyal31e4c282009-12-03 12:59:42 -0500642
Tejun Heoe8989fa2012-03-05 13:15:20 -0800643static void blkg_destroy(struct blkio_group *blkg)
Tejun Heo72e06c22012-03-05 13:15:00 -0800644{
Tejun Heo03aa2642012-03-05 13:15:19 -0800645 struct request_queue *q = blkg->q;
Tejun Heo9f13ef62012-03-05 13:15:21 -0800646 struct blkio_cgroup *blkcg = blkg->blkcg;
Tejun Heo03aa2642012-03-05 13:15:19 -0800647
648 lockdep_assert_held(q->queue_lock);
Tejun Heo9f13ef62012-03-05 13:15:21 -0800649 lockdep_assert_held(&blkcg->lock);
Tejun Heo03aa2642012-03-05 13:15:19 -0800650
651 /* Something wrong if we are trying to remove same group twice */
Tejun Heoe8989fa2012-03-05 13:15:20 -0800652 WARN_ON_ONCE(list_empty(&blkg->q_node));
Tejun Heo9f13ef62012-03-05 13:15:21 -0800653 WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
Tejun Heoe8989fa2012-03-05 13:15:20 -0800654 list_del_init(&blkg->q_node);
Tejun Heo9f13ef62012-03-05 13:15:21 -0800655 hlist_del_init_rcu(&blkg->blkcg_node);
Tejun Heo03aa2642012-03-05 13:15:19 -0800656
Tejun Heo03aa2642012-03-05 13:15:19 -0800657 /*
658 * Put the reference taken at the time of creation so that when all
659 * queues are gone, group can be destroyed.
660 */
661 blkg_put(blkg);
662}
663
Tejun Heoe8989fa2012-03-05 13:15:20 -0800664/*
665 * XXX: This updates blkg policy data in-place for root blkg, which is
666 * necessary across elevator switch and policy registration as root blkgs
667 * aren't shot down. This broken and racy implementation is temporary.
668 * Eventually, blkg shoot down will be replaced by proper in-place update.
669 */
670void update_root_blkg_pd(struct request_queue *q, enum blkio_policy_id plid)
671{
672 struct blkio_policy_type *pol = blkio_policy[plid];
673 struct blkio_group *blkg = blkg_lookup(&blkio_root_cgroup, q);
674 struct blkg_policy_data *pd;
675
676 if (!blkg)
677 return;
678
679 kfree(blkg->pd[plid]);
680 blkg->pd[plid] = NULL;
681
682 if (!pol)
683 return;
684
685 pd = kzalloc(sizeof(*pd) + pol->pdata_size, GFP_KERNEL);
686 WARN_ON_ONCE(!pd);
687
688 pd->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
689 WARN_ON_ONCE(!pd->stats_cpu);
690
691 blkg->pd[plid] = pd;
692 pd->blkg = blkg;
693 pol->ops.blkio_init_group_fn(blkg);
694}
695EXPORT_SYMBOL_GPL(update_root_blkg_pd);
696
Tejun Heo9f13ef62012-03-05 13:15:21 -0800697/**
698 * blkg_destroy_all - destroy all blkgs associated with a request_queue
699 * @q: request_queue of interest
700 * @destroy_root: whether to destroy root blkg or not
701 *
702 * Destroy blkgs associated with @q. If @destroy_root is %true, all are
703 * destroyed; otherwise, root blkg is left alone.
704 */
Tejun Heoe8989fa2012-03-05 13:15:20 -0800705void blkg_destroy_all(struct request_queue *q, bool destroy_root)
Tejun Heo03aa2642012-03-05 13:15:19 -0800706{
707 struct blkio_group *blkg, *n;
Tejun Heo72e06c22012-03-05 13:15:00 -0800708
Tejun Heo9f13ef62012-03-05 13:15:21 -0800709 spin_lock_irq(q->queue_lock);
Tejun Heo72e06c22012-03-05 13:15:00 -0800710
Tejun Heo9f13ef62012-03-05 13:15:21 -0800711 list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
712 struct blkio_cgroup *blkcg = blkg->blkcg;
Tejun Heo72e06c22012-03-05 13:15:00 -0800713
Tejun Heo9f13ef62012-03-05 13:15:21 -0800714 /* skip root? */
715 if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
716 continue;
Tejun Heo03aa2642012-03-05 13:15:19 -0800717
Tejun Heo9f13ef62012-03-05 13:15:21 -0800718 spin_lock(&blkcg->lock);
719 blkg_destroy(blkg);
720 spin_unlock(&blkcg->lock);
Tejun Heo72e06c22012-03-05 13:15:00 -0800721 }
Tejun Heo9f13ef62012-03-05 13:15:21 -0800722
723 spin_unlock_irq(q->queue_lock);
Tejun Heo72e06c22012-03-05 13:15:00 -0800724}
Tejun Heo03aa2642012-03-05 13:15:19 -0800725EXPORT_SYMBOL_GPL(blkg_destroy_all);
Tejun Heo72e06c22012-03-05 13:15:00 -0800726
Tejun Heo1adaf3d2012-03-05 13:15:15 -0800727static void blkg_rcu_free(struct rcu_head *rcu_head)
728{
729 blkg_free(container_of(rcu_head, struct blkio_group, rcu_head));
730}
731
732void __blkg_release(struct blkio_group *blkg)
733{
734 /* release the extra blkcg reference this blkg has been holding */
735 css_put(&blkg->blkcg->css);
736
737 /*
738 * A group is freed in rcu manner. But having an rcu lock does not
739 * mean that one can access all the fields of blkg and assume these
740 * are valid. For example, don't try to follow throtl_data and
741 * request queue links.
742 *
743 * Having a reference to blkg under an rcu allows acess to only
744 * values local to groups like group stats and group rate limits
745 */
746 call_rcu(&blkg->rcu_head, blkg_rcu_free);
747}
748EXPORT_SYMBOL_GPL(__blkg_release);
749
Tejun Heoc1768262012-03-05 13:15:17 -0800750static void blkio_reset_stats_cpu(struct blkio_group *blkg, int plid)
Vivek Goyalf0bdc8c2011-05-19 15:38:30 -0400751{
Tejun Heoc1768262012-03-05 13:15:17 -0800752 struct blkg_policy_data *pd = blkg->pd[plid];
Vivek Goyalf0bdc8c2011-05-19 15:38:30 -0400753 struct blkio_group_stats_cpu *stats_cpu;
754 int i, j, k;
755 /*
756 * Note: On 64 bit arch this should not be an issue. This has the
757 * possibility of returning some inconsistent value on 32bit arch
758 * as 64bit update on 32bit is non atomic. Taking care of this
759 * corner case makes code very complicated, like sending IPIs to
760 * cpus, taking care of stats of offline cpus etc.
761 *
762 * reset stats is anyway more of a debug feature and this sounds a
763 * corner case. So I am not complicating the code yet until and
764 * unless this becomes a real issue.
765 */
766 for_each_possible_cpu(i) {
Tejun Heo549d3aa2012-03-05 13:15:16 -0800767 stats_cpu = per_cpu_ptr(pd->stats_cpu, i);
Vivek Goyalf0bdc8c2011-05-19 15:38:30 -0400768 stats_cpu->sectors = 0;
769 for(j = 0; j < BLKIO_STAT_CPU_NR; j++)
770 for (k = 0; k < BLKIO_STAT_TOTAL; k++)
771 stats_cpu->stat_arr_cpu[j][k] = 0;
772 }
773}
774
Divyesh Shah303a3ac2010-04-01 15:01:24 -0700775static int
Divyesh Shah84c124d2010-04-09 08:31:19 +0200776blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
Divyesh Shah303a3ac2010-04-01 15:01:24 -0700777{
778 struct blkio_cgroup *blkcg;
779 struct blkio_group *blkg;
Divyesh Shah812df482010-04-08 21:15:35 -0700780 struct blkio_group_stats *stats;
Divyesh Shah303a3ac2010-04-01 15:01:24 -0700781 struct hlist_node *n;
Divyesh Shahcdc11842010-04-08 21:15:10 -0700782 uint64_t queued[BLKIO_STAT_TOTAL];
783 int i;
Divyesh Shah812df482010-04-08 21:15:35 -0700784#ifdef CONFIG_DEBUG_BLK_CGROUP
785 bool idling, waiting, empty;
786 unsigned long long now = sched_clock();
787#endif
Divyesh Shah303a3ac2010-04-01 15:01:24 -0700788
789 blkcg = cgroup_to_blkio_cgroup(cgroup);
Tejun Heoe8989fa2012-03-05 13:15:20 -0800790 spin_lock(&blkio_list_lock);
Divyesh Shah303a3ac2010-04-01 15:01:24 -0700791 spin_lock_irq(&blkcg->lock);
792 hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
Tejun Heoe8989fa2012-03-05 13:15:20 -0800793 struct blkio_policy_type *pol;
Tejun Heo549d3aa2012-03-05 13:15:16 -0800794
Tejun Heoe8989fa2012-03-05 13:15:20 -0800795 list_for_each_entry(pol, &blkio_list, list) {
796 struct blkg_policy_data *pd = blkg->pd[pol->plid];
Vivek Goyalf0bdc8c2011-05-19 15:38:30 -0400797
Tejun Heoe8989fa2012-03-05 13:15:20 -0800798 spin_lock(&blkg->stats_lock);
799 stats = &pd->stats;
800#ifdef CONFIG_DEBUG_BLK_CGROUP
801 idling = blkio_blkg_idling(stats);
802 waiting = blkio_blkg_waiting(stats);
803 empty = blkio_blkg_empty(stats);
804#endif
805 for (i = 0; i < BLKIO_STAT_TOTAL; i++)
806 queued[i] = stats->stat_arr[BLKIO_STAT_QUEUED][i];
807 memset(stats, 0, sizeof(struct blkio_group_stats));
808 for (i = 0; i < BLKIO_STAT_TOTAL; i++)
809 stats->stat_arr[BLKIO_STAT_QUEUED][i] = queued[i];
810#ifdef CONFIG_DEBUG_BLK_CGROUP
811 if (idling) {
812 blkio_mark_blkg_idling(stats);
813 stats->start_idle_time = now;
814 }
815 if (waiting) {
816 blkio_mark_blkg_waiting(stats);
817 stats->start_group_wait_time = now;
818 }
819 if (empty) {
820 blkio_mark_blkg_empty(stats);
821 stats->start_empty_time = now;
822 }
823#endif
824 spin_unlock(&blkg->stats_lock);
825
826 /* Reset Per cpu stats which don't take blkg->stats_lock */
827 blkio_reset_stats_cpu(blkg, pol->plid);
828 }
Divyesh Shah303a3ac2010-04-01 15:01:24 -0700829 }
Vivek Goyalf0bdc8c2011-05-19 15:38:30 -0400830
Divyesh Shah303a3ac2010-04-01 15:01:24 -0700831 spin_unlock_irq(&blkcg->lock);
Tejun Heoe8989fa2012-03-05 13:15:20 -0800832 spin_unlock(&blkio_list_lock);
Divyesh Shah303a3ac2010-04-01 15:01:24 -0700833 return 0;
834}
835
Tejun Heo7a4dd282012-03-05 13:15:09 -0800836static void blkio_get_key_name(enum stat_sub_type type, const char *dname,
837 char *str, int chars_left, bool diskname_only)
Divyesh Shah303a3ac2010-04-01 15:01:24 -0700838{
Tejun Heo7a4dd282012-03-05 13:15:09 -0800839 snprintf(str, chars_left, "%s", dname);
Divyesh Shah303a3ac2010-04-01 15:01:24 -0700840 chars_left -= strlen(str);
841 if (chars_left <= 0) {
842 printk(KERN_WARNING
843 "Possibly incorrect cgroup stat display format");
844 return;
845 }
Divyesh Shah84c124d2010-04-09 08:31:19 +0200846 if (diskname_only)
847 return;
Divyesh Shah303a3ac2010-04-01 15:01:24 -0700848 switch (type) {
Divyesh Shah84c124d2010-04-09 08:31:19 +0200849 case BLKIO_STAT_READ:
Divyesh Shah303a3ac2010-04-01 15:01:24 -0700850 strlcat(str, " Read", chars_left);
851 break;
Divyesh Shah84c124d2010-04-09 08:31:19 +0200852 case BLKIO_STAT_WRITE:
Divyesh Shah303a3ac2010-04-01 15:01:24 -0700853 strlcat(str, " Write", chars_left);
854 break;
Divyesh Shah84c124d2010-04-09 08:31:19 +0200855 case BLKIO_STAT_SYNC:
Divyesh Shah303a3ac2010-04-01 15:01:24 -0700856 strlcat(str, " Sync", chars_left);
857 break;
Divyesh Shah84c124d2010-04-09 08:31:19 +0200858 case BLKIO_STAT_ASYNC:
Divyesh Shah303a3ac2010-04-01 15:01:24 -0700859 strlcat(str, " Async", chars_left);
860 break;
Divyesh Shah84c124d2010-04-09 08:31:19 +0200861 case BLKIO_STAT_TOTAL:
Divyesh Shah303a3ac2010-04-01 15:01:24 -0700862 strlcat(str, " Total", chars_left);
863 break;
864 default:
865 strlcat(str, " Invalid", chars_left);
866 }
867}
868
Divyesh Shah84c124d2010-04-09 08:31:19 +0200869static uint64_t blkio_fill_stat(char *str, int chars_left, uint64_t val,
Tejun Heo7a4dd282012-03-05 13:15:09 -0800870 struct cgroup_map_cb *cb, const char *dname)
Divyesh Shah84c124d2010-04-09 08:31:19 +0200871{
Tejun Heo7a4dd282012-03-05 13:15:09 -0800872 blkio_get_key_name(0, dname, str, chars_left, true);
Divyesh Shah84c124d2010-04-09 08:31:19 +0200873 cb->fill(cb, str, val);
874 return val;
875}
Divyesh Shah303a3ac2010-04-01 15:01:24 -0700876
Vivek Goyal5624a4e2011-05-19 15:38:28 -0400877
Tejun Heoc1768262012-03-05 13:15:17 -0800878static uint64_t blkio_read_stat_cpu(struct blkio_group *blkg, int plid,
Vivek Goyal5624a4e2011-05-19 15:38:28 -0400879 enum stat_type_cpu type, enum stat_sub_type sub_type)
880{
Tejun Heoc1768262012-03-05 13:15:17 -0800881 struct blkg_policy_data *pd = blkg->pd[plid];
Vivek Goyal5624a4e2011-05-19 15:38:28 -0400882 int cpu;
883 struct blkio_group_stats_cpu *stats_cpu;
Vivek Goyal575969a2011-05-19 15:38:29 -0400884 u64 val = 0, tval;
Vivek Goyal5624a4e2011-05-19 15:38:28 -0400885
886 for_each_possible_cpu(cpu) {
Vivek Goyal575969a2011-05-19 15:38:29 -0400887 unsigned int start;
Tejun Heo549d3aa2012-03-05 13:15:16 -0800888 stats_cpu = per_cpu_ptr(pd->stats_cpu, cpu);
Vivek Goyal5624a4e2011-05-19 15:38:28 -0400889
Vivek Goyal575969a2011-05-19 15:38:29 -0400890 do {
891 start = u64_stats_fetch_begin(&stats_cpu->syncp);
892 if (type == BLKIO_STAT_CPU_SECTORS)
893 tval = stats_cpu->sectors;
894 else
895 tval = stats_cpu->stat_arr_cpu[type][sub_type];
896 } while(u64_stats_fetch_retry(&stats_cpu->syncp, start));
897
898 val += tval;
Vivek Goyal5624a4e2011-05-19 15:38:28 -0400899 }
900
901 return val;
902}
903
Tejun Heoc1768262012-03-05 13:15:17 -0800904static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg, int plid,
Tejun Heo7a4dd282012-03-05 13:15:09 -0800905 struct cgroup_map_cb *cb, const char *dname,
906 enum stat_type_cpu type)
Vivek Goyal5624a4e2011-05-19 15:38:28 -0400907{
908 uint64_t disk_total, val;
909 char key_str[MAX_KEY_LEN];
910 enum stat_sub_type sub_type;
911
912 if (type == BLKIO_STAT_CPU_SECTORS) {
Tejun Heoc1768262012-03-05 13:15:17 -0800913 val = blkio_read_stat_cpu(blkg, plid, type, 0);
Tejun Heo7a4dd282012-03-05 13:15:09 -0800914 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, val, cb,
915 dname);
Vivek Goyal5624a4e2011-05-19 15:38:28 -0400916 }
917
918 for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL;
919 sub_type++) {
Tejun Heo7a4dd282012-03-05 13:15:09 -0800920 blkio_get_key_name(sub_type, dname, key_str, MAX_KEY_LEN,
921 false);
Tejun Heoc1768262012-03-05 13:15:17 -0800922 val = blkio_read_stat_cpu(blkg, plid, type, sub_type);
Vivek Goyal5624a4e2011-05-19 15:38:28 -0400923 cb->fill(cb, key_str, val);
924 }
925
Tejun Heoc1768262012-03-05 13:15:17 -0800926 disk_total = blkio_read_stat_cpu(blkg, plid, type, BLKIO_STAT_READ) +
927 blkio_read_stat_cpu(blkg, plid, type, BLKIO_STAT_WRITE);
Vivek Goyal5624a4e2011-05-19 15:38:28 -0400928
Tejun Heo7a4dd282012-03-05 13:15:09 -0800929 blkio_get_key_name(BLKIO_STAT_TOTAL, dname, key_str, MAX_KEY_LEN,
930 false);
Vivek Goyal5624a4e2011-05-19 15:38:28 -0400931 cb->fill(cb, key_str, disk_total);
932 return disk_total;
933}
934
Divyesh Shah84c124d2010-04-09 08:31:19 +0200935/* This should be called with blkg->stats_lock held */
Tejun Heoc1768262012-03-05 13:15:17 -0800936static uint64_t blkio_get_stat(struct blkio_group *blkg, int plid,
Tejun Heo7a4dd282012-03-05 13:15:09 -0800937 struct cgroup_map_cb *cb, const char *dname,
938 enum stat_type type)
Divyesh Shah303a3ac2010-04-01 15:01:24 -0700939{
Tejun Heoc1768262012-03-05 13:15:17 -0800940 struct blkg_policy_data *pd = blkg->pd[plid];
Divyesh Shah303a3ac2010-04-01 15:01:24 -0700941 uint64_t disk_total;
942 char key_str[MAX_KEY_LEN];
Divyesh Shah84c124d2010-04-09 08:31:19 +0200943 enum stat_sub_type sub_type;
Divyesh Shah303a3ac2010-04-01 15:01:24 -0700944
Divyesh Shah84c124d2010-04-09 08:31:19 +0200945 if (type == BLKIO_STAT_TIME)
946 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
Tejun Heo549d3aa2012-03-05 13:15:16 -0800947 pd->stats.time, cb, dname);
Justin TerAvest9026e522011-03-22 21:26:54 +0100948#ifdef CONFIG_DEBUG_BLK_CGROUP
Justin TerAvest167400d2011-03-12 16:54:00 +0100949 if (type == BLKIO_STAT_UNACCOUNTED_TIME)
950 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
Tejun Heo549d3aa2012-03-05 13:15:16 -0800951 pd->stats.unaccounted_time, cb, dname);
Divyesh Shahcdc11842010-04-08 21:15:10 -0700952 if (type == BLKIO_STAT_AVG_QUEUE_SIZE) {
Tejun Heo549d3aa2012-03-05 13:15:16 -0800953 uint64_t sum = pd->stats.avg_queue_size_sum;
954 uint64_t samples = pd->stats.avg_queue_size_samples;
Divyesh Shahcdc11842010-04-08 21:15:10 -0700955 if (samples)
956 do_div(sum, samples);
957 else
958 sum = 0;
Tejun Heo7a4dd282012-03-05 13:15:09 -0800959 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
960 sum, cb, dname);
Divyesh Shahcdc11842010-04-08 21:15:10 -0700961 }
Divyesh Shah812df482010-04-08 21:15:35 -0700962 if (type == BLKIO_STAT_GROUP_WAIT_TIME)
963 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
Tejun Heo549d3aa2012-03-05 13:15:16 -0800964 pd->stats.group_wait_time, cb, dname);
Divyesh Shah812df482010-04-08 21:15:35 -0700965 if (type == BLKIO_STAT_IDLE_TIME)
966 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
Tejun Heo549d3aa2012-03-05 13:15:16 -0800967 pd->stats.idle_time, cb, dname);
Divyesh Shah812df482010-04-08 21:15:35 -0700968 if (type == BLKIO_STAT_EMPTY_TIME)
969 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
Tejun Heo549d3aa2012-03-05 13:15:16 -0800970 pd->stats.empty_time, cb, dname);
Divyesh Shah84c124d2010-04-09 08:31:19 +0200971 if (type == BLKIO_STAT_DEQUEUE)
972 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
Tejun Heo549d3aa2012-03-05 13:15:16 -0800973 pd->stats.dequeue, cb, dname);
Divyesh Shah84c124d2010-04-09 08:31:19 +0200974#endif
975
976 for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL;
977 sub_type++) {
Tejun Heo7a4dd282012-03-05 13:15:09 -0800978 blkio_get_key_name(sub_type, dname, key_str, MAX_KEY_LEN,
979 false);
Tejun Heo549d3aa2012-03-05 13:15:16 -0800980 cb->fill(cb, key_str, pd->stats.stat_arr[type][sub_type]);
Divyesh Shah303a3ac2010-04-01 15:01:24 -0700981 }
Tejun Heo549d3aa2012-03-05 13:15:16 -0800982 disk_total = pd->stats.stat_arr[type][BLKIO_STAT_READ] +
983 pd->stats.stat_arr[type][BLKIO_STAT_WRITE];
Tejun Heo7a4dd282012-03-05 13:15:09 -0800984 blkio_get_key_name(BLKIO_STAT_TOTAL, dname, key_str, MAX_KEY_LEN,
985 false);
Divyesh Shah303a3ac2010-04-01 15:01:24 -0700986 cb->fill(cb, key_str, disk_total);
987 return disk_total;
988}
989
Tejun Heo4bfd4822012-03-05 13:15:08 -0800990static int blkio_policy_parse_and_set(char *buf, enum blkio_policy_id plid,
991 int fileid, struct blkio_cgroup *blkcg)
Gui Jianfeng34d0f172010-04-13 16:05:49 +0800992{
Tejun Heoece84242011-10-19 14:31:15 +0200993 struct gendisk *disk = NULL;
Tejun Heoe56da7e2012-03-05 13:15:07 -0800994 struct blkio_group *blkg = NULL;
Tejun Heo549d3aa2012-03-05 13:15:16 -0800995 struct blkg_policy_data *pd;
Gui Jianfeng34d0f172010-04-13 16:05:49 +0800996 char *s[4], *p, *major_s = NULL, *minor_s = NULL;
Wanlong Gaod11bb442011-09-21 10:22:10 +0200997 unsigned long major, minor;
Tejun Heoece84242011-10-19 14:31:15 +0200998 int i = 0, ret = -EINVAL;
999 int part;
Gui Jianfeng34d0f172010-04-13 16:05:49 +08001000 dev_t dev;
Wanlong Gaod11bb442011-09-21 10:22:10 +02001001 u64 temp;
Gui Jianfeng34d0f172010-04-13 16:05:49 +08001002
1003 memset(s, 0, sizeof(s));
1004
1005 while ((p = strsep(&buf, " ")) != NULL) {
1006 if (!*p)
1007 continue;
1008
1009 s[i++] = p;
1010
1011 /* Prevent from inputing too many things */
1012 if (i == 3)
1013 break;
1014 }
1015
1016 if (i != 2)
Tejun Heoece84242011-10-19 14:31:15 +02001017 goto out;
Gui Jianfeng34d0f172010-04-13 16:05:49 +08001018
1019 p = strsep(&s[0], ":");
1020 if (p != NULL)
1021 major_s = p;
1022 else
Tejun Heoece84242011-10-19 14:31:15 +02001023 goto out;
Gui Jianfeng34d0f172010-04-13 16:05:49 +08001024
1025 minor_s = s[0];
1026 if (!minor_s)
Tejun Heoece84242011-10-19 14:31:15 +02001027 goto out;
Gui Jianfeng34d0f172010-04-13 16:05:49 +08001028
Tejun Heoece84242011-10-19 14:31:15 +02001029 if (strict_strtoul(major_s, 10, &major))
1030 goto out;
Gui Jianfeng34d0f172010-04-13 16:05:49 +08001031
Tejun Heoece84242011-10-19 14:31:15 +02001032 if (strict_strtoul(minor_s, 10, &minor))
1033 goto out;
Gui Jianfeng34d0f172010-04-13 16:05:49 +08001034
1035 dev = MKDEV(major, minor);
1036
Tejun Heoece84242011-10-19 14:31:15 +02001037 if (strict_strtoull(s[1], 10, &temp))
1038 goto out;
Wanlong Gaod11bb442011-09-21 10:22:10 +02001039
Tejun Heoe56da7e2012-03-05 13:15:07 -08001040 disk = get_gendisk(dev, &part);
Tejun Heo4bfd4822012-03-05 13:15:08 -08001041 if (!disk || part)
Tejun Heoe56da7e2012-03-05 13:15:07 -08001042 goto out;
Tejun Heoe56da7e2012-03-05 13:15:07 -08001043
1044 rcu_read_lock();
1045
Tejun Heo4bfd4822012-03-05 13:15:08 -08001046 spin_lock_irq(disk->queue->queue_lock);
1047 blkg = blkg_lookup_create(blkcg, disk->queue, plid, false);
1048 spin_unlock_irq(disk->queue->queue_lock);
Tejun Heoe56da7e2012-03-05 13:15:07 -08001049
Tejun Heo4bfd4822012-03-05 13:15:08 -08001050 if (IS_ERR(blkg)) {
1051 ret = PTR_ERR(blkg);
1052 goto out_unlock;
Wanlong Gaod11bb442011-09-21 10:22:10 +02001053 }
Gui Jianfeng34d0f172010-04-13 16:05:49 +08001054
Tejun Heo549d3aa2012-03-05 13:15:16 -08001055 pd = blkg->pd[plid];
1056
Vivek Goyal062a6442010-09-15 17:06:33 -04001057 switch (plid) {
1058 case BLKIO_POLICY_PROP:
Wanlong Gaod11bb442011-09-21 10:22:10 +02001059 if ((temp < BLKIO_WEIGHT_MIN && temp > 0) ||
1060 temp > BLKIO_WEIGHT_MAX)
Tejun Heoe56da7e2012-03-05 13:15:07 -08001061 goto out_unlock;
Gui Jianfeng34d0f172010-04-13 16:05:49 +08001062
Tejun Heo549d3aa2012-03-05 13:15:16 -08001063 pd->conf.weight = temp;
Tejun Heoc1768262012-03-05 13:15:17 -08001064 blkio_update_group_weight(blkg, plid, temp ?: blkcg->weight);
Vivek Goyal4c9eefa2010-09-15 17:06:34 -04001065 break;
1066 case BLKIO_POLICY_THROTL:
Vivek Goyal7702e8f2010-09-15 17:06:36 -04001067 switch(fileid) {
1068 case BLKIO_THROTL_read_bps_device:
Tejun Heo549d3aa2012-03-05 13:15:16 -08001069 pd->conf.bps[READ] = temp;
Tejun Heoc1768262012-03-05 13:15:17 -08001070 blkio_update_group_bps(blkg, plid, temp ?: -1, fileid);
Tejun Heoe56da7e2012-03-05 13:15:07 -08001071 break;
Vivek Goyal7702e8f2010-09-15 17:06:36 -04001072 case BLKIO_THROTL_write_bps_device:
Tejun Heo549d3aa2012-03-05 13:15:16 -08001073 pd->conf.bps[WRITE] = temp;
Tejun Heoc1768262012-03-05 13:15:17 -08001074 blkio_update_group_bps(blkg, plid, temp ?: -1, fileid);
Vivek Goyal7702e8f2010-09-15 17:06:36 -04001075 break;
1076 case BLKIO_THROTL_read_iops_device:
Tejun Heoe56da7e2012-03-05 13:15:07 -08001077 if (temp > THROTL_IOPS_MAX)
1078 goto out_unlock;
Tejun Heo549d3aa2012-03-05 13:15:16 -08001079 pd->conf.iops[READ] = temp;
Tejun Heoc1768262012-03-05 13:15:17 -08001080 blkio_update_group_iops(blkg, plid, temp ?: -1, fileid);
Tejun Heoe56da7e2012-03-05 13:15:07 -08001081 break;
Vivek Goyal7702e8f2010-09-15 17:06:36 -04001082 case BLKIO_THROTL_write_iops_device:
Wanlong Gaod11bb442011-09-21 10:22:10 +02001083 if (temp > THROTL_IOPS_MAX)
Tejun Heoe56da7e2012-03-05 13:15:07 -08001084 goto out_unlock;
Tejun Heo549d3aa2012-03-05 13:15:16 -08001085 pd->conf.iops[WRITE] = temp;
Tejun Heoc1768262012-03-05 13:15:17 -08001086 blkio_update_group_iops(blkg, plid, temp ?: -1, fileid);
Vivek Goyal7702e8f2010-09-15 17:06:36 -04001087 break;
1088 }
Vivek Goyal062a6442010-09-15 17:06:33 -04001089 break;
1090 default:
1091 BUG();
1092 }
Tejun Heoece84242011-10-19 14:31:15 +02001093 ret = 0;
Tejun Heoe56da7e2012-03-05 13:15:07 -08001094out_unlock:
1095 rcu_read_unlock();
Tejun Heoece84242011-10-19 14:31:15 +02001096out:
1097 put_disk(disk);
Tejun Heoe56da7e2012-03-05 13:15:07 -08001098
1099 /*
1100 * If queue was bypassing, we should retry. Do so after a short
1101 * msleep(). It isn't strictly necessary but queue can be
1102 * bypassing for some time and it's always nice to avoid busy
1103 * looping.
1104 */
1105 if (ret == -EBUSY) {
1106 msleep(10);
1107 return restart_syscall();
1108 }
Tejun Heoece84242011-10-19 14:31:15 +02001109 return ret;
Gui Jianfeng34d0f172010-04-13 16:05:49 +08001110}
1111
Vivek Goyal062a6442010-09-15 17:06:33 -04001112static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft,
1113 const char *buffer)
Gui Jianfeng34d0f172010-04-13 16:05:49 +08001114{
1115 int ret = 0;
1116 char *buf;
Tejun Heoe56da7e2012-03-05 13:15:07 -08001117 struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
Vivek Goyal062a6442010-09-15 17:06:33 -04001118 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1119 int fileid = BLKIOFILE_ATTR(cft->private);
Gui Jianfeng34d0f172010-04-13 16:05:49 +08001120
1121 buf = kstrdup(buffer, GFP_KERNEL);
1122 if (!buf)
1123 return -ENOMEM;
1124
Tejun Heo4bfd4822012-03-05 13:15:08 -08001125 ret = blkio_policy_parse_and_set(buf, plid, fileid, blkcg);
Gui Jianfeng34d0f172010-04-13 16:05:49 +08001126 kfree(buf);
1127 return ret;
1128}
1129
Vivek Goyal92616b52012-03-05 13:15:10 -08001130static const char *blkg_dev_name(struct blkio_group *blkg)
1131{
1132 /* some drivers (floppy) instantiate a queue w/o disk registered */
1133 if (blkg->q->backing_dev_info.dev)
1134 return dev_name(blkg->q->backing_dev_info.dev);
1135 return NULL;
1136}
1137
Tejun Heo4bfd4822012-03-05 13:15:08 -08001138static void blkio_print_group_conf(struct cftype *cft, struct blkio_group *blkg,
1139 struct seq_file *m)
Gui Jianfeng34d0f172010-04-13 16:05:49 +08001140{
Tejun Heoc1768262012-03-05 13:15:17 -08001141 int plid = BLKIOFILE_POLICY(cft->private);
Tejun Heo4bfd4822012-03-05 13:15:08 -08001142 int fileid = BLKIOFILE_ATTR(cft->private);
Tejun Heoc1768262012-03-05 13:15:17 -08001143 struct blkg_policy_data *pd = blkg->pd[plid];
1144 const char *dname = blkg_dev_name(blkg);
Tejun Heo4bfd4822012-03-05 13:15:08 -08001145 int rw = WRITE;
1146
Vivek Goyal92616b52012-03-05 13:15:10 -08001147 if (!dname)
1148 return;
1149
Tejun Heoc1768262012-03-05 13:15:17 -08001150 switch (plid) {
Vivek Goyal062a6442010-09-15 17:06:33 -04001151 case BLKIO_POLICY_PROP:
Tejun Heo549d3aa2012-03-05 13:15:16 -08001152 if (pd->conf.weight)
Tejun Heo7a4dd282012-03-05 13:15:09 -08001153 seq_printf(m, "%s\t%u\n",
Tejun Heo549d3aa2012-03-05 13:15:16 -08001154 dname, pd->conf.weight);
Vivek Goyal4c9eefa2010-09-15 17:06:34 -04001155 break;
1156 case BLKIO_POLICY_THROTL:
Tejun Heo4bfd4822012-03-05 13:15:08 -08001157 switch (fileid) {
Vivek Goyal7702e8f2010-09-15 17:06:36 -04001158 case BLKIO_THROTL_read_bps_device:
Tejun Heo4bfd4822012-03-05 13:15:08 -08001159 rw = READ;
Vivek Goyal7702e8f2010-09-15 17:06:36 -04001160 case BLKIO_THROTL_write_bps_device:
Tejun Heo549d3aa2012-03-05 13:15:16 -08001161 if (pd->conf.bps[rw])
Tejun Heo7a4dd282012-03-05 13:15:09 -08001162 seq_printf(m, "%s\t%llu\n",
Tejun Heo549d3aa2012-03-05 13:15:16 -08001163 dname, pd->conf.bps[rw]);
Vivek Goyal7702e8f2010-09-15 17:06:36 -04001164 break;
1165 case BLKIO_THROTL_read_iops_device:
Tejun Heo4bfd4822012-03-05 13:15:08 -08001166 rw = READ;
Vivek Goyal7702e8f2010-09-15 17:06:36 -04001167 case BLKIO_THROTL_write_iops_device:
Tejun Heo549d3aa2012-03-05 13:15:16 -08001168 if (pd->conf.iops[rw])
Tejun Heo7a4dd282012-03-05 13:15:09 -08001169 seq_printf(m, "%s\t%u\n",
Tejun Heo549d3aa2012-03-05 13:15:16 -08001170 dname, pd->conf.iops[rw]);
Vivek Goyal7702e8f2010-09-15 17:06:36 -04001171 break;
1172 }
Vivek Goyal062a6442010-09-15 17:06:33 -04001173 break;
1174 default:
1175 BUG();
1176 }
1177}
1178
1179/* cgroup files which read their data from policy nodes end up here */
Tejun Heo4bfd4822012-03-05 13:15:08 -08001180static void blkio_read_conf(struct cftype *cft, struct blkio_cgroup *blkcg,
1181 struct seq_file *m)
Vivek Goyal062a6442010-09-15 17:06:33 -04001182{
Tejun Heo4bfd4822012-03-05 13:15:08 -08001183 struct blkio_group *blkg;
1184 struct hlist_node *n;
Gui Jianfeng34d0f172010-04-13 16:05:49 +08001185
Tejun Heo4bfd4822012-03-05 13:15:08 -08001186 spin_lock_irq(&blkcg->lock);
1187 hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node)
Tejun Heoe8989fa2012-03-05 13:15:20 -08001188 blkio_print_group_conf(cft, blkg, m);
Tejun Heo4bfd4822012-03-05 13:15:08 -08001189 spin_unlock_irq(&blkcg->lock);
Vivek Goyal062a6442010-09-15 17:06:33 -04001190}
1191
1192static int blkiocg_file_read(struct cgroup *cgrp, struct cftype *cft,
1193 struct seq_file *m)
1194{
1195 struct blkio_cgroup *blkcg;
1196 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1197 int name = BLKIOFILE_ATTR(cft->private);
1198
1199 blkcg = cgroup_to_blkio_cgroup(cgrp);
1200
1201 switch(plid) {
1202 case BLKIO_POLICY_PROP:
1203 switch(name) {
1204 case BLKIO_PROP_weight_device:
Tejun Heo4bfd4822012-03-05 13:15:08 -08001205 blkio_read_conf(cft, blkcg, m);
Vivek Goyal062a6442010-09-15 17:06:33 -04001206 return 0;
1207 default:
1208 BUG();
1209 }
1210 break;
Vivek Goyal4c9eefa2010-09-15 17:06:34 -04001211 case BLKIO_POLICY_THROTL:
1212 switch(name){
1213 case BLKIO_THROTL_read_bps_device:
1214 case BLKIO_THROTL_write_bps_device:
Vivek Goyal7702e8f2010-09-15 17:06:36 -04001215 case BLKIO_THROTL_read_iops_device:
1216 case BLKIO_THROTL_write_iops_device:
Tejun Heo4bfd4822012-03-05 13:15:08 -08001217 blkio_read_conf(cft, blkcg, m);
Vivek Goyal4c9eefa2010-09-15 17:06:34 -04001218 return 0;
1219 default:
1220 BUG();
1221 }
1222 break;
Vivek Goyal062a6442010-09-15 17:06:33 -04001223 default:
1224 BUG();
1225 }
1226
1227 return 0;
1228}
1229
1230static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
Vivek Goyal5624a4e2011-05-19 15:38:28 -04001231 struct cftype *cft, struct cgroup_map_cb *cb,
1232 enum stat_type type, bool show_total, bool pcpu)
Vivek Goyal062a6442010-09-15 17:06:33 -04001233{
1234 struct blkio_group *blkg;
1235 struct hlist_node *n;
1236 uint64_t cgroup_total = 0;
1237
Tejun Heoc875f4d2012-03-05 13:15:22 -08001238 spin_lock_irq(&blkcg->lock);
1239
1240 hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
Vivek Goyal92616b52012-03-05 13:15:10 -08001241 const char *dname = blkg_dev_name(blkg);
Tejun Heoc1768262012-03-05 13:15:17 -08001242 int plid = BLKIOFILE_POLICY(cft->private);
Tejun Heo7a4dd282012-03-05 13:15:09 -08001243
Tejun Heoe8989fa2012-03-05 13:15:20 -08001244 if (!dname)
Tejun Heo7a4dd282012-03-05 13:15:09 -08001245 continue;
Tejun Heoc1768262012-03-05 13:15:17 -08001246 if (pcpu) {
1247 cgroup_total += blkio_get_stat_cpu(blkg, plid,
1248 cb, dname, type);
1249 } else {
Tejun Heoc875f4d2012-03-05 13:15:22 -08001250 spin_lock(&blkg->stats_lock);
Tejun Heoc1768262012-03-05 13:15:17 -08001251 cgroup_total += blkio_get_stat(blkg, plid,
1252 cb, dname, type);
Tejun Heoc875f4d2012-03-05 13:15:22 -08001253 spin_unlock(&blkg->stats_lock);
Vivek Goyal062a6442010-09-15 17:06:33 -04001254 }
1255 }
1256 if (show_total)
1257 cb->fill(cb, "Total", cgroup_total);
Tejun Heoc875f4d2012-03-05 13:15:22 -08001258
1259 spin_unlock_irq(&blkcg->lock);
Vivek Goyal062a6442010-09-15 17:06:33 -04001260 return 0;
1261}
1262
1263/* All map kind of cgroup file get serviced by this function */
1264static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft,
1265 struct cgroup_map_cb *cb)
1266{
1267 struct blkio_cgroup *blkcg;
1268 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1269 int name = BLKIOFILE_ATTR(cft->private);
1270
1271 blkcg = cgroup_to_blkio_cgroup(cgrp);
1272
1273 switch(plid) {
1274 case BLKIO_POLICY_PROP:
1275 switch(name) {
1276 case BLKIO_PROP_time:
1277 return blkio_read_blkg_stats(blkcg, cft, cb,
Vivek Goyal5624a4e2011-05-19 15:38:28 -04001278 BLKIO_STAT_TIME, 0, 0);
Vivek Goyal062a6442010-09-15 17:06:33 -04001279 case BLKIO_PROP_sectors:
1280 return blkio_read_blkg_stats(blkcg, cft, cb,
Vivek Goyal5624a4e2011-05-19 15:38:28 -04001281 BLKIO_STAT_CPU_SECTORS, 0, 1);
Vivek Goyal062a6442010-09-15 17:06:33 -04001282 case BLKIO_PROP_io_service_bytes:
1283 return blkio_read_blkg_stats(blkcg, cft, cb,
Vivek Goyal5624a4e2011-05-19 15:38:28 -04001284 BLKIO_STAT_CPU_SERVICE_BYTES, 1, 1);
Vivek Goyal062a6442010-09-15 17:06:33 -04001285 case BLKIO_PROP_io_serviced:
1286 return blkio_read_blkg_stats(blkcg, cft, cb,
Vivek Goyal5624a4e2011-05-19 15:38:28 -04001287 BLKIO_STAT_CPU_SERVICED, 1, 1);
Vivek Goyal062a6442010-09-15 17:06:33 -04001288 case BLKIO_PROP_io_service_time:
1289 return blkio_read_blkg_stats(blkcg, cft, cb,
Vivek Goyal5624a4e2011-05-19 15:38:28 -04001290 BLKIO_STAT_SERVICE_TIME, 1, 0);
Vivek Goyal062a6442010-09-15 17:06:33 -04001291 case BLKIO_PROP_io_wait_time:
1292 return blkio_read_blkg_stats(blkcg, cft, cb,
Vivek Goyal5624a4e2011-05-19 15:38:28 -04001293 BLKIO_STAT_WAIT_TIME, 1, 0);
Vivek Goyal062a6442010-09-15 17:06:33 -04001294 case BLKIO_PROP_io_merged:
1295 return blkio_read_blkg_stats(blkcg, cft, cb,
Vivek Goyal317389a2011-05-23 10:02:19 +02001296 BLKIO_STAT_CPU_MERGED, 1, 1);
Vivek Goyal062a6442010-09-15 17:06:33 -04001297 case BLKIO_PROP_io_queued:
1298 return blkio_read_blkg_stats(blkcg, cft, cb,
Vivek Goyal5624a4e2011-05-19 15:38:28 -04001299 BLKIO_STAT_QUEUED, 1, 0);
Vivek Goyal062a6442010-09-15 17:06:33 -04001300#ifdef CONFIG_DEBUG_BLK_CGROUP
Justin TerAvest9026e522011-03-22 21:26:54 +01001301 case BLKIO_PROP_unaccounted_time:
1302 return blkio_read_blkg_stats(blkcg, cft, cb,
Vivek Goyal5624a4e2011-05-19 15:38:28 -04001303 BLKIO_STAT_UNACCOUNTED_TIME, 0, 0);
Vivek Goyal062a6442010-09-15 17:06:33 -04001304 case BLKIO_PROP_dequeue:
1305 return blkio_read_blkg_stats(blkcg, cft, cb,
Vivek Goyal5624a4e2011-05-19 15:38:28 -04001306 BLKIO_STAT_DEQUEUE, 0, 0);
Vivek Goyal062a6442010-09-15 17:06:33 -04001307 case BLKIO_PROP_avg_queue_size:
1308 return blkio_read_blkg_stats(blkcg, cft, cb,
Vivek Goyal5624a4e2011-05-19 15:38:28 -04001309 BLKIO_STAT_AVG_QUEUE_SIZE, 0, 0);
Vivek Goyal062a6442010-09-15 17:06:33 -04001310 case BLKIO_PROP_group_wait_time:
1311 return blkio_read_blkg_stats(blkcg, cft, cb,
Vivek Goyal5624a4e2011-05-19 15:38:28 -04001312 BLKIO_STAT_GROUP_WAIT_TIME, 0, 0);
Vivek Goyal062a6442010-09-15 17:06:33 -04001313 case BLKIO_PROP_idle_time:
1314 return blkio_read_blkg_stats(blkcg, cft, cb,
Vivek Goyal5624a4e2011-05-19 15:38:28 -04001315 BLKIO_STAT_IDLE_TIME, 0, 0);
Vivek Goyal062a6442010-09-15 17:06:33 -04001316 case BLKIO_PROP_empty_time:
1317 return blkio_read_blkg_stats(blkcg, cft, cb,
Vivek Goyal5624a4e2011-05-19 15:38:28 -04001318 BLKIO_STAT_EMPTY_TIME, 0, 0);
Vivek Goyal062a6442010-09-15 17:06:33 -04001319#endif
1320 default:
1321 BUG();
1322 }
1323 break;
Vivek Goyal4c9eefa2010-09-15 17:06:34 -04001324 case BLKIO_POLICY_THROTL:
1325 switch(name){
1326 case BLKIO_THROTL_io_service_bytes:
1327 return blkio_read_blkg_stats(blkcg, cft, cb,
Vivek Goyal5624a4e2011-05-19 15:38:28 -04001328 BLKIO_STAT_CPU_SERVICE_BYTES, 1, 1);
Vivek Goyal4c9eefa2010-09-15 17:06:34 -04001329 case BLKIO_THROTL_io_serviced:
1330 return blkio_read_blkg_stats(blkcg, cft, cb,
Vivek Goyal5624a4e2011-05-19 15:38:28 -04001331 BLKIO_STAT_CPU_SERVICED, 1, 1);
Vivek Goyal4c9eefa2010-09-15 17:06:34 -04001332 default:
1333 BUG();
1334 }
1335 break;
Vivek Goyal062a6442010-09-15 17:06:33 -04001336 default:
1337 BUG();
1338 }
1339
1340 return 0;
1341}
1342
Tejun Heo4bfd4822012-03-05 13:15:08 -08001343static int blkio_weight_write(struct blkio_cgroup *blkcg, int plid, u64 val)
Vivek Goyal062a6442010-09-15 17:06:33 -04001344{
1345 struct blkio_group *blkg;
1346 struct hlist_node *n;
Vivek Goyal062a6442010-09-15 17:06:33 -04001347
1348 if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
1349 return -EINVAL;
1350
1351 spin_lock(&blkio_list_lock);
1352 spin_lock_irq(&blkcg->lock);
1353 blkcg->weight = (unsigned int)val;
1354
Tejun Heo549d3aa2012-03-05 13:15:16 -08001355 hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
Tejun Heoe8989fa2012-03-05 13:15:20 -08001356 struct blkg_policy_data *pd = blkg->pd[plid];
Tejun Heo549d3aa2012-03-05 13:15:16 -08001357
Tejun Heoe8989fa2012-03-05 13:15:20 -08001358 if (!pd->conf.weight)
Tejun Heoc1768262012-03-05 13:15:17 -08001359 blkio_update_group_weight(blkg, plid, blkcg->weight);
Tejun Heo549d3aa2012-03-05 13:15:16 -08001360 }
Vivek Goyal062a6442010-09-15 17:06:33 -04001361
Vivek Goyal062a6442010-09-15 17:06:33 -04001362 spin_unlock_irq(&blkcg->lock);
1363 spin_unlock(&blkio_list_lock);
1364 return 0;
1365}
1366
1367static u64 blkiocg_file_read_u64 (struct cgroup *cgrp, struct cftype *cft) {
1368 struct blkio_cgroup *blkcg;
1369 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1370 int name = BLKIOFILE_ATTR(cft->private);
1371
1372 blkcg = cgroup_to_blkio_cgroup(cgrp);
1373
1374 switch(plid) {
1375 case BLKIO_POLICY_PROP:
1376 switch(name) {
1377 case BLKIO_PROP_weight:
1378 return (u64)blkcg->weight;
1379 }
1380 break;
1381 default:
1382 BUG();
1383 }
1384 return 0;
1385}
1386
1387static int
1388blkiocg_file_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
1389{
1390 struct blkio_cgroup *blkcg;
1391 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1392 int name = BLKIOFILE_ATTR(cft->private);
1393
1394 blkcg = cgroup_to_blkio_cgroup(cgrp);
1395
1396 switch(plid) {
1397 case BLKIO_POLICY_PROP:
1398 switch(name) {
1399 case BLKIO_PROP_weight:
Tejun Heo4bfd4822012-03-05 13:15:08 -08001400 return blkio_weight_write(blkcg, plid, val);
Vivek Goyal062a6442010-09-15 17:06:33 -04001401 }
1402 break;
1403 default:
1404 BUG();
1405 }
Gui Jianfeng34d0f172010-04-13 16:05:49 +08001406
Gui Jianfeng34d0f172010-04-13 16:05:49 +08001407 return 0;
1408}
1409
Vivek Goyal31e4c282009-12-03 12:59:42 -05001410struct cftype blkio_files[] = {
1411 {
Gui Jianfeng34d0f172010-04-13 16:05:49 +08001412 .name = "weight_device",
Vivek Goyal062a6442010-09-15 17:06:33 -04001413 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1414 BLKIO_PROP_weight_device),
1415 .read_seq_string = blkiocg_file_read,
1416 .write_string = blkiocg_file_write,
Gui Jianfeng34d0f172010-04-13 16:05:49 +08001417 .max_write_len = 256,
1418 },
1419 {
Vivek Goyal31e4c282009-12-03 12:59:42 -05001420 .name = "weight",
Vivek Goyal062a6442010-09-15 17:06:33 -04001421 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1422 BLKIO_PROP_weight),
1423 .read_u64 = blkiocg_file_read_u64,
1424 .write_u64 = blkiocg_file_write_u64,
Vivek Goyal31e4c282009-12-03 12:59:42 -05001425 },
Vivek Goyal22084192009-12-03 12:59:49 -05001426 {
1427 .name = "time",
Vivek Goyal13f98252010-10-01 14:49:41 +02001428 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1429 BLKIO_PROP_time),
1430 .read_map = blkiocg_file_read_map,
Vivek Goyal22084192009-12-03 12:59:49 -05001431 },
1432 {
1433 .name = "sectors",
Vivek Goyal13f98252010-10-01 14:49:41 +02001434 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1435 BLKIO_PROP_sectors),
1436 .read_map = blkiocg_file_read_map,
Divyesh Shah303a3ac2010-04-01 15:01:24 -07001437 },
1438 {
1439 .name = "io_service_bytes",
Vivek Goyal13f98252010-10-01 14:49:41 +02001440 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1441 BLKIO_PROP_io_service_bytes),
1442 .read_map = blkiocg_file_read_map,
Divyesh Shah303a3ac2010-04-01 15:01:24 -07001443 },
1444 {
1445 .name = "io_serviced",
Vivek Goyal13f98252010-10-01 14:49:41 +02001446 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1447 BLKIO_PROP_io_serviced),
1448 .read_map = blkiocg_file_read_map,
Divyesh Shah303a3ac2010-04-01 15:01:24 -07001449 },
1450 {
1451 .name = "io_service_time",
Vivek Goyal13f98252010-10-01 14:49:41 +02001452 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1453 BLKIO_PROP_io_service_time),
1454 .read_map = blkiocg_file_read_map,
Divyesh Shah303a3ac2010-04-01 15:01:24 -07001455 },
1456 {
1457 .name = "io_wait_time",
Vivek Goyal13f98252010-10-01 14:49:41 +02001458 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1459 BLKIO_PROP_io_wait_time),
1460 .read_map = blkiocg_file_read_map,
Divyesh Shah84c124d2010-04-09 08:31:19 +02001461 },
1462 {
Divyesh Shah812d4022010-04-08 21:14:23 -07001463 .name = "io_merged",
Vivek Goyal13f98252010-10-01 14:49:41 +02001464 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1465 BLKIO_PROP_io_merged),
1466 .read_map = blkiocg_file_read_map,
Divyesh Shah812d4022010-04-08 21:14:23 -07001467 },
1468 {
Divyesh Shahcdc11842010-04-08 21:15:10 -07001469 .name = "io_queued",
Vivek Goyal13f98252010-10-01 14:49:41 +02001470 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1471 BLKIO_PROP_io_queued),
1472 .read_map = blkiocg_file_read_map,
Divyesh Shahcdc11842010-04-08 21:15:10 -07001473 },
1474 {
Divyesh Shah84c124d2010-04-09 08:31:19 +02001475 .name = "reset_stats",
1476 .write_u64 = blkiocg_reset_stats,
Vivek Goyal22084192009-12-03 12:59:49 -05001477 },
Vivek Goyal13f98252010-10-01 14:49:41 +02001478#ifdef CONFIG_BLK_DEV_THROTTLING
1479 {
Vivek Goyal4c9eefa2010-09-15 17:06:34 -04001480 .name = "throttle.read_bps_device",
1481 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1482 BLKIO_THROTL_read_bps_device),
1483 .read_seq_string = blkiocg_file_read,
1484 .write_string = blkiocg_file_write,
1485 .max_write_len = 256,
1486 },
1487
1488 {
1489 .name = "throttle.write_bps_device",
1490 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1491 BLKIO_THROTL_write_bps_device),
1492 .read_seq_string = blkiocg_file_read,
1493 .write_string = blkiocg_file_write,
1494 .max_write_len = 256,
1495 },
Vivek Goyal7702e8f2010-09-15 17:06:36 -04001496
1497 {
1498 .name = "throttle.read_iops_device",
1499 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1500 BLKIO_THROTL_read_iops_device),
1501 .read_seq_string = blkiocg_file_read,
1502 .write_string = blkiocg_file_write,
1503 .max_write_len = 256,
1504 },
1505
1506 {
1507 .name = "throttle.write_iops_device",
1508 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1509 BLKIO_THROTL_write_iops_device),
1510 .read_seq_string = blkiocg_file_read,
1511 .write_string = blkiocg_file_write,
1512 .max_write_len = 256,
1513 },
Vivek Goyal4c9eefa2010-09-15 17:06:34 -04001514 {
Vivek Goyal4c9eefa2010-09-15 17:06:34 -04001515 .name = "throttle.io_service_bytes",
1516 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1517 BLKIO_THROTL_io_service_bytes),
1518 .read_map = blkiocg_file_read_map,
1519 },
1520 {
Vivek Goyal4c9eefa2010-09-15 17:06:34 -04001521 .name = "throttle.io_serviced",
1522 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1523 BLKIO_THROTL_io_serviced),
1524 .read_map = blkiocg_file_read_map,
1525 },
Vivek Goyal13f98252010-10-01 14:49:41 +02001526#endif /* CONFIG_BLK_DEV_THROTTLING */
1527
Vivek Goyal22084192009-12-03 12:59:49 -05001528#ifdef CONFIG_DEBUG_BLK_CGROUP
Divyesh Shahcdc11842010-04-08 21:15:10 -07001529 {
1530 .name = "avg_queue_size",
Vivek Goyal062a6442010-09-15 17:06:33 -04001531 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1532 BLKIO_PROP_avg_queue_size),
1533 .read_map = blkiocg_file_read_map,
Divyesh Shahcdc11842010-04-08 21:15:10 -07001534 },
1535 {
Divyesh Shah812df482010-04-08 21:15:35 -07001536 .name = "group_wait_time",
Vivek Goyal062a6442010-09-15 17:06:33 -04001537 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1538 BLKIO_PROP_group_wait_time),
1539 .read_map = blkiocg_file_read_map,
Divyesh Shah812df482010-04-08 21:15:35 -07001540 },
1541 {
1542 .name = "idle_time",
Vivek Goyal062a6442010-09-15 17:06:33 -04001543 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1544 BLKIO_PROP_idle_time),
1545 .read_map = blkiocg_file_read_map,
Divyesh Shah812df482010-04-08 21:15:35 -07001546 },
1547 {
1548 .name = "empty_time",
Vivek Goyal062a6442010-09-15 17:06:33 -04001549 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1550 BLKIO_PROP_empty_time),
1551 .read_map = blkiocg_file_read_map,
Divyesh Shah812df482010-04-08 21:15:35 -07001552 },
1553 {
Vivek Goyal22084192009-12-03 12:59:49 -05001554 .name = "dequeue",
Vivek Goyal062a6442010-09-15 17:06:33 -04001555 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1556 BLKIO_PROP_dequeue),
1557 .read_map = blkiocg_file_read_map,
Divyesh Shahcdc11842010-04-08 21:15:10 -07001558 },
Justin TerAvest9026e522011-03-22 21:26:54 +01001559 {
1560 .name = "unaccounted_time",
1561 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1562 BLKIO_PROP_unaccounted_time),
1563 .read_map = blkiocg_file_read_map,
1564 },
Vivek Goyal22084192009-12-03 12:59:49 -05001565#endif
Vivek Goyal31e4c282009-12-03 12:59:42 -05001566};
1567
1568static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
1569{
1570 return cgroup_add_files(cgroup, subsys, blkio_files,
1571 ARRAY_SIZE(blkio_files));
1572}
1573
Tejun Heo9f13ef62012-03-05 13:15:21 -08001574/**
1575 * blkiocg_pre_destroy - cgroup pre_destroy callback
1576 * @subsys: cgroup subsys
1577 * @cgroup: cgroup of interest
1578 *
1579 * This function is called when @cgroup is about to go away and responsible
1580 * for shooting down all blkgs associated with @cgroup. blkgs should be
1581 * removed while holding both q and blkcg locks. As blkcg lock is nested
1582 * inside q lock, this function performs reverse double lock dancing.
1583 *
1584 * This is the blkcg counterpart of ioc_release_fn().
1585 */
Tejun Heo7ee9c562012-03-05 13:15:11 -08001586static int blkiocg_pre_destroy(struct cgroup_subsys *subsys,
1587 struct cgroup *cgroup)
Vivek Goyal31e4c282009-12-03 12:59:42 -05001588{
1589 struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
1590
Tejun Heo9f13ef62012-03-05 13:15:21 -08001591 spin_lock_irq(&blkcg->lock);
Tejun Heo7ee9c562012-03-05 13:15:11 -08001592
Tejun Heo9f13ef62012-03-05 13:15:21 -08001593 while (!hlist_empty(&blkcg->blkg_list)) {
1594 struct blkio_group *blkg = hlist_entry(blkcg->blkg_list.first,
1595 struct blkio_group, blkcg_node);
Tejun Heoc875f4d2012-03-05 13:15:22 -08001596 struct request_queue *q = blkg->q;
Vivek Goyalb1c35762009-12-03 12:59:47 -05001597
Tejun Heo9f13ef62012-03-05 13:15:21 -08001598 if (spin_trylock(q->queue_lock)) {
1599 blkg_destroy(blkg);
1600 spin_unlock(q->queue_lock);
1601 } else {
1602 spin_unlock_irq(&blkcg->lock);
Tejun Heo9f13ef62012-03-05 13:15:21 -08001603 cpu_relax();
Tejun Heo9f13ef62012-03-05 13:15:21 -08001604 spin_lock(&blkcg->lock);
Jens Axboe0f3942a2010-05-03 14:28:55 +02001605 }
Tejun Heo9f13ef62012-03-05 13:15:21 -08001606 }
Jens Axboe0f3942a2010-05-03 14:28:55 +02001607
Tejun Heo9f13ef62012-03-05 13:15:21 -08001608 spin_unlock_irq(&blkcg->lock);
Tejun Heo7ee9c562012-03-05 13:15:11 -08001609 return 0;
1610}
1611
1612static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
1613{
1614 struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
1615
Ben Blum67523c42010-03-10 15:22:11 -08001616 if (blkcg != &blkio_root_cgroup)
1617 kfree(blkcg);
Vivek Goyal31e4c282009-12-03 12:59:42 -05001618}
1619
1620static struct cgroup_subsys_state *
1621blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup)
1622{
Li Zefan03415092010-05-07 08:57:00 +02001623 struct blkio_cgroup *blkcg;
1624 struct cgroup *parent = cgroup->parent;
Vivek Goyal31e4c282009-12-03 12:59:42 -05001625
Li Zefan03415092010-05-07 08:57:00 +02001626 if (!parent) {
Vivek Goyal31e4c282009-12-03 12:59:42 -05001627 blkcg = &blkio_root_cgroup;
1628 goto done;
1629 }
1630
Vivek Goyal31e4c282009-12-03 12:59:42 -05001631 blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
1632 if (!blkcg)
1633 return ERR_PTR(-ENOMEM);
1634
1635 blkcg->weight = BLKIO_WEIGHT_DEFAULT;
1636done:
1637 spin_lock_init(&blkcg->lock);
1638 INIT_HLIST_HEAD(&blkcg->blkg_list);
1639
1640 return &blkcg->css;
1641}
1642
Tejun Heo5efd6112012-03-05 13:15:12 -08001643/**
1644 * blkcg_init_queue - initialize blkcg part of request queue
1645 * @q: request_queue to initialize
1646 *
1647 * Called from blk_alloc_queue_node(). Responsible for initializing blkcg
1648 * part of new request_queue @q.
1649 *
1650 * RETURNS:
1651 * 0 on success, -errno on failure.
1652 */
1653int blkcg_init_queue(struct request_queue *q)
1654{
Tejun Heo923adde2012-03-05 13:15:13 -08001655 int ret;
1656
Tejun Heo5efd6112012-03-05 13:15:12 -08001657 might_sleep();
1658
Tejun Heo923adde2012-03-05 13:15:13 -08001659 ret = blk_throtl_init(q);
1660 if (ret)
1661 return ret;
1662
1663 mutex_lock(&all_q_mutex);
1664 INIT_LIST_HEAD(&q->all_q_node);
1665 list_add_tail(&q->all_q_node, &all_q_list);
1666 mutex_unlock(&all_q_mutex);
1667
1668 return 0;
Tejun Heo5efd6112012-03-05 13:15:12 -08001669}
1670
1671/**
1672 * blkcg_drain_queue - drain blkcg part of request_queue
1673 * @q: request_queue to drain
1674 *
1675 * Called from blk_drain_queue(). Responsible for draining blkcg part.
1676 */
1677void blkcg_drain_queue(struct request_queue *q)
1678{
1679 lockdep_assert_held(q->queue_lock);
1680
1681 blk_throtl_drain(q);
1682}
1683
1684/**
1685 * blkcg_exit_queue - exit and release blkcg part of request_queue
1686 * @q: request_queue being released
1687 *
1688 * Called from blk_release_queue(). Responsible for exiting blkcg part.
1689 */
1690void blkcg_exit_queue(struct request_queue *q)
1691{
Tejun Heo923adde2012-03-05 13:15:13 -08001692 mutex_lock(&all_q_mutex);
1693 list_del_init(&q->all_q_node);
1694 mutex_unlock(&all_q_mutex);
1695
Tejun Heoe8989fa2012-03-05 13:15:20 -08001696 blkg_destroy_all(q, true);
1697
Tejun Heo5efd6112012-03-05 13:15:12 -08001698 blk_throtl_exit(q);
1699}
1700
Vivek Goyal31e4c282009-12-03 12:59:42 -05001701/*
1702 * We cannot support shared io contexts, as we have no mean to support
1703 * two tasks with the same ioc in two different groups without major rework
1704 * of the main cic data structures. For now we allow a task to change
1705 * its cgroup only if it's the only owner of its ioc.
1706 */
Tejun Heobb9d97b2011-12-12 18:12:21 -08001707static int blkiocg_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
1708 struct cgroup_taskset *tset)
Vivek Goyal31e4c282009-12-03 12:59:42 -05001709{
Tejun Heobb9d97b2011-12-12 18:12:21 -08001710 struct task_struct *task;
Vivek Goyal31e4c282009-12-03 12:59:42 -05001711 struct io_context *ioc;
1712 int ret = 0;
1713
1714 /* task_lock() is needed to avoid races with exit_io_context() */
Tejun Heobb9d97b2011-12-12 18:12:21 -08001715 cgroup_taskset_for_each(task, cgrp, tset) {
1716 task_lock(task);
1717 ioc = task->io_context;
1718 if (ioc && atomic_read(&ioc->nr_tasks) > 1)
1719 ret = -EINVAL;
1720 task_unlock(task);
1721 if (ret)
1722 break;
1723 }
Vivek Goyal31e4c282009-12-03 12:59:42 -05001724 return ret;
1725}
1726
Tejun Heobb9d97b2011-12-12 18:12:21 -08001727static void blkiocg_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
1728 struct cgroup_taskset *tset)
Vivek Goyal31e4c282009-12-03 12:59:42 -05001729{
Tejun Heobb9d97b2011-12-12 18:12:21 -08001730 struct task_struct *task;
Vivek Goyal31e4c282009-12-03 12:59:42 -05001731 struct io_context *ioc;
1732
Tejun Heobb9d97b2011-12-12 18:12:21 -08001733 cgroup_taskset_for_each(task, cgrp, tset) {
Linus Torvaldsb3c9dd12012-01-15 12:24:45 -08001734 /* we don't lose anything even if ioc allocation fails */
1735 ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
1736 if (ioc) {
1737 ioc_cgroup_changed(ioc);
Tejun Heo11a31222012-02-07 07:51:30 +01001738 put_io_context(ioc);
Linus Torvaldsb3c9dd12012-01-15 12:24:45 -08001739 }
Tejun Heobb9d97b2011-12-12 18:12:21 -08001740 }
Vivek Goyal31e4c282009-12-03 12:59:42 -05001741}
1742
Tejun Heo923adde2012-03-05 13:15:13 -08001743static void blkcg_bypass_start(void)
1744 __acquires(&all_q_mutex)
1745{
1746 struct request_queue *q;
1747
1748 mutex_lock(&all_q_mutex);
1749
1750 list_for_each_entry(q, &all_q_list, all_q_node) {
1751 blk_queue_bypass_start(q);
Tejun Heoe8989fa2012-03-05 13:15:20 -08001752 blkg_destroy_all(q, false);
Tejun Heo923adde2012-03-05 13:15:13 -08001753 }
1754}
1755
1756static void blkcg_bypass_end(void)
1757 __releases(&all_q_mutex)
1758{
1759 struct request_queue *q;
1760
1761 list_for_each_entry(q, &all_q_list, all_q_node)
1762 blk_queue_bypass_end(q);
1763
1764 mutex_unlock(&all_q_mutex);
1765}
1766
Vivek Goyal3e252062009-12-04 10:36:42 -05001767void blkio_policy_register(struct blkio_policy_type *blkiop)
1768{
Tejun Heoe8989fa2012-03-05 13:15:20 -08001769 struct request_queue *q;
1770
Tejun Heo923adde2012-03-05 13:15:13 -08001771 blkcg_bypass_start();
Vivek Goyal3e252062009-12-04 10:36:42 -05001772 spin_lock(&blkio_list_lock);
Tejun Heo035d10b2012-03-05 13:15:04 -08001773
1774 BUG_ON(blkio_policy[blkiop->plid]);
1775 blkio_policy[blkiop->plid] = blkiop;
Vivek Goyal3e252062009-12-04 10:36:42 -05001776 list_add_tail(&blkiop->list, &blkio_list);
Tejun Heo035d10b2012-03-05 13:15:04 -08001777
Vivek Goyal3e252062009-12-04 10:36:42 -05001778 spin_unlock(&blkio_list_lock);
Tejun Heoe8989fa2012-03-05 13:15:20 -08001779 list_for_each_entry(q, &all_q_list, all_q_node)
1780 update_root_blkg_pd(q, blkiop->plid);
Tejun Heo923adde2012-03-05 13:15:13 -08001781 blkcg_bypass_end();
Vivek Goyal3e252062009-12-04 10:36:42 -05001782}
1783EXPORT_SYMBOL_GPL(blkio_policy_register);
1784
1785void blkio_policy_unregister(struct blkio_policy_type *blkiop)
1786{
Tejun Heoe8989fa2012-03-05 13:15:20 -08001787 struct request_queue *q;
1788
Tejun Heo923adde2012-03-05 13:15:13 -08001789 blkcg_bypass_start();
Vivek Goyal3e252062009-12-04 10:36:42 -05001790 spin_lock(&blkio_list_lock);
Tejun Heo035d10b2012-03-05 13:15:04 -08001791
1792 BUG_ON(blkio_policy[blkiop->plid] != blkiop);
1793 blkio_policy[blkiop->plid] = NULL;
Vivek Goyal3e252062009-12-04 10:36:42 -05001794 list_del_init(&blkiop->list);
Tejun Heo035d10b2012-03-05 13:15:04 -08001795
Vivek Goyal3e252062009-12-04 10:36:42 -05001796 spin_unlock(&blkio_list_lock);
Tejun Heoe8989fa2012-03-05 13:15:20 -08001797 list_for_each_entry(q, &all_q_list, all_q_node)
1798 update_root_blkg_pd(q, blkiop->plid);
Tejun Heo923adde2012-03-05 13:15:13 -08001799 blkcg_bypass_end();
Vivek Goyal3e252062009-12-04 10:36:42 -05001800}
1801EXPORT_SYMBOL_GPL(blkio_policy_unregister);