blob: 8399c920c451bbff5e8ce9073bd0ca27a036259d [file] [log] [blame]
Paolo Valente70f28712013-05-09 19:10:02 +02001/*
2 * BFQ: CGROUPS support.
3 *
4 * Based on ideas and code from CFQ:
5 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
6 *
7 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
8 * Paolo Valente <paolo.valente@unimore.it>
9 *
10 * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
11 *
12 * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ
13 * file.
14 */
15
16#ifdef CONFIG_CGROUP_BFQIO
17static struct bfqio_cgroup bfqio_root_cgroup = {
18 .weight = BFQ_DEFAULT_GRP_WEIGHT,
19 .ioprio = BFQ_DEFAULT_GRP_IOPRIO,
20 .ioprio_class = BFQ_DEFAULT_GRP_CLASS,
21};
22
23static inline void bfq_init_entity(struct bfq_entity *entity,
24 struct bfq_group *bfqg)
25{
26 entity->weight = entity->new_weight;
27 entity->orig_weight = entity->new_weight;
28 entity->ioprio = entity->new_ioprio;
29 entity->ioprio_class = entity->new_ioprio_class;
30 entity->parent = bfqg->my_entity;
31 entity->sched_data = &bfqg->sched_data;
32}
33
34static struct bfqio_cgroup *cgroup_to_bfqio(struct cgroup *cgroup)
35{
36 return container_of(cgroup_subsys_state(cgroup, bfqio_subsys_id),
37 struct bfqio_cgroup, css);
38}
39
40/*
41 * Search the bfq_group for bfqd into the hash table (by now only a list)
42 * of bgrp. Must be called under rcu_read_lock().
43 */
44static struct bfq_group *bfqio_lookup_group(struct bfqio_cgroup *bgrp,
45 struct bfq_data *bfqd)
46{
47 struct bfq_group *bfqg;
48 struct hlist_node *n;
49 void *key;
50
51 hlist_for_each_entry_rcu(bfqg, n, &bgrp->group_data, group_node) {
52 key = rcu_dereference(bfqg->bfqd);
53 if (key == bfqd)
54 return bfqg;
55 }
56
57 return NULL;
58}
59
60static inline void bfq_group_init_entity(struct bfqio_cgroup *bgrp,
61 struct bfq_group *bfqg)
62{
63 struct bfq_entity *entity = &bfqg->entity;
64
65 /*
66 * If the weight of the entity has never been set via the sysfs
67 * interface, then bgrp->weight == 0. In this case we initialize
68 * the weight from the current ioprio value. Otherwise, the group
69 * weight, if set, has priority over the ioprio value.
70 */
71 if (bgrp->weight == 0) {
72 entity->new_weight = bfq_ioprio_to_weight(bgrp->ioprio);
73 entity->new_ioprio = bgrp->ioprio;
74 } else {
75 if (bgrp->weight < BFQ_MIN_WEIGHT ||
76 bgrp->weight > BFQ_MAX_WEIGHT) {
77 printk(KERN_CRIT "bfq_group_init_entity: "
78 "bgrp->weight %d\n", bgrp->weight);
79 BUG();
80 }
81 entity->new_weight = bgrp->weight;
82 entity->new_ioprio = bfq_weight_to_ioprio(bgrp->weight);
83 }
84 entity->orig_weight = entity->weight = entity->new_weight;
85 entity->ioprio = entity->new_ioprio;
86 entity->ioprio_class = entity->new_ioprio_class = bgrp->ioprio_class;
87 entity->my_sched_data = &bfqg->sched_data;
88 bfqg->active_entities = 0;
89}
90
91static inline void bfq_group_set_parent(struct bfq_group *bfqg,
92 struct bfq_group *parent)
93{
94 struct bfq_entity *entity;
95
96 BUG_ON(parent == NULL);
97 BUG_ON(bfqg == NULL);
98
99 entity = &bfqg->entity;
100 entity->parent = parent->my_entity;
101 entity->sched_data = &parent->sched_data;
102}
103
104/**
105 * bfq_group_chain_alloc - allocate a chain of groups.
106 * @bfqd: queue descriptor.
107 * @cgroup: the leaf cgroup this chain starts from.
108 *
109 * Allocate a chain of groups starting from the one belonging to
110 * @cgroup up to the root cgroup. Stop if a cgroup on the chain
111 * to the root has already an allocated group on @bfqd.
112 */
113static struct bfq_group *bfq_group_chain_alloc(struct bfq_data *bfqd,
114 struct cgroup *cgroup)
115{
116 struct bfqio_cgroup *bgrp;
117 struct bfq_group *bfqg, *prev = NULL, *leaf = NULL;
118
119 for (; cgroup != NULL; cgroup = cgroup->parent) {
120 bgrp = cgroup_to_bfqio(cgroup);
121
122 bfqg = bfqio_lookup_group(bgrp, bfqd);
123 if (bfqg != NULL) {
124 /*
125 * All the cgroups in the path from there to the
126 * root must have a bfq_group for bfqd, so we don't
127 * need any more allocations.
128 */
129 break;
130 }
131
132 bfqg = kzalloc(sizeof(*bfqg), GFP_ATOMIC);
133 if (bfqg == NULL)
134 goto cleanup;
135
136 bfq_group_init_entity(bgrp, bfqg);
137 bfqg->my_entity = &bfqg->entity;
138
139 if (leaf == NULL) {
140 leaf = bfqg;
141 prev = leaf;
142 } else {
143 bfq_group_set_parent(prev, bfqg);
144 /*
145 * Build a list of allocated nodes using the bfqd
146 * filed, that is still unused and will be
147 * initialized only after the node will be
148 * connected.
149 */
150 prev->bfqd = bfqg;
151 prev = bfqg;
152 }
153 }
154
155 return leaf;
156
157cleanup:
158 while (leaf != NULL) {
159 prev = leaf;
160 leaf = leaf->bfqd;
161 kfree(prev);
162 }
163
164 return NULL;
165}
166
167/**
168 * bfq_group_chain_link - link an allocated group chain to a cgroup
169 * hierarchy.
170 * @bfqd: the queue descriptor.
171 * @cgroup: the leaf cgroup to start from.
172 * @leaf: the leaf group (to be associated to @cgroup).
173 *
174 * Try to link a chain of groups to a cgroup hierarchy, connecting the
175 * nodes bottom-up, so we can be sure that when we find a cgroup in the
176 * hierarchy that already as a group associated to @bfqd all the nodes
177 * in the path to the root cgroup have one too.
178 *
179 * On locking: the queue lock protects the hierarchy (there is a hierarchy
180 * per device) while the bfqio_cgroup lock protects the list of groups
181 * belonging to the same cgroup.
182 */
183static void bfq_group_chain_link(struct bfq_data *bfqd, struct cgroup *cgroup,
184 struct bfq_group *leaf)
185{
186 struct bfqio_cgroup *bgrp;
187 struct bfq_group *bfqg, *next, *prev = NULL;
188 unsigned long flags;
189
190 assert_spin_locked(bfqd->queue->queue_lock);
191
192 for (; cgroup != NULL && leaf != NULL; cgroup = cgroup->parent) {
193 bgrp = cgroup_to_bfqio(cgroup);
194 next = leaf->bfqd;
195
196 bfqg = bfqio_lookup_group(bgrp, bfqd);
197 BUG_ON(bfqg != NULL);
198
199 spin_lock_irqsave(&bgrp->lock, flags);
200
201 rcu_assign_pointer(leaf->bfqd, bfqd);
202 hlist_add_head_rcu(&leaf->group_node, &bgrp->group_data);
203 hlist_add_head(&leaf->bfqd_node, &bfqd->group_list);
204
205 spin_unlock_irqrestore(&bgrp->lock, flags);
206
207 prev = leaf;
208 leaf = next;
209 }
210
211 BUG_ON(cgroup == NULL && leaf != NULL);
212 if (cgroup != NULL && prev != NULL) {
213 bgrp = cgroup_to_bfqio(cgroup);
214 bfqg = bfqio_lookup_group(bgrp, bfqd);
215 bfq_group_set_parent(prev, bfqg);
216 }
217}
218
219/**
220 * bfq_find_alloc_group - return the group associated to @bfqd in @cgroup.
221 * @bfqd: queue descriptor.
222 * @cgroup: cgroup being searched for.
223 *
224 * Return a group associated to @bfqd in @cgroup, allocating one if
225 * necessary. When a group is returned all the cgroups in the path
226 * to the root have a group associated to @bfqd.
227 *
228 * If the allocation fails, return the root group: this breaks guarantees
229 * but is a safe fallback. If this loss becomes a problem it can be
230 * mitigated using the equivalent weight (given by the product of the
231 * weights of the groups in the path from @group to the root) in the
232 * root scheduler.
233 *
234 * We allocate all the missing nodes in the path from the leaf cgroup
235 * to the root and we connect the nodes only after all the allocations
236 * have been successful.
237 */
238static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
239 struct cgroup *cgroup)
240{
241 struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup);
242 struct bfq_group *bfqg;
243
244 bfqg = bfqio_lookup_group(bgrp, bfqd);
245 if (bfqg != NULL)
246 return bfqg;
247
248 bfqg = bfq_group_chain_alloc(bfqd, cgroup);
249 if (bfqg != NULL)
250 bfq_group_chain_link(bfqd, cgroup, bfqg);
251 else
252 bfqg = bfqd->root_group;
253
254 return bfqg;
255}
256
257/**
258 * bfq_bfqq_move - migrate @bfqq to @bfqg.
259 * @bfqd: queue descriptor.
260 * @bfqq: the queue to move.
261 * @entity: @bfqq's entity.
262 * @bfqg: the group to move to.
263 *
264 * Move @bfqq to @bfqg, deactivating it from its old group and reactivating
265 * it on the new one. Avoid putting the entity on the old group idle tree.
266 *
267 * Must be called under the queue lock; the cgroup owning @bfqg must
268 * not disappear (by now this just means that we are called under
269 * rcu_read_lock()).
270 */
271static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
272 struct bfq_entity *entity, struct bfq_group *bfqg)
273{
274 int busy, resume;
275
276 busy = bfq_bfqq_busy(bfqq);
277 resume = !RB_EMPTY_ROOT(&bfqq->sort_list);
278
279 BUG_ON(resume && !entity->on_st);
280 BUG_ON(busy && !resume && entity->on_st &&
281 bfqq != bfqd->in_service_queue);
282
283 if (busy) {
284 BUG_ON(atomic_read(&bfqq->ref) < 2);
285
286 if (!resume)
287 bfq_del_bfqq_busy(bfqd, bfqq, 0);
288 else
289 bfq_deactivate_bfqq(bfqd, bfqq, 0);
290 } else if (entity->on_st)
291 bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
292
293 /*
294 * Here we use a reference to bfqg. We don't need a refcounter
295 * as the cgroup reference will not be dropped, so that its
296 * destroy() callback will not be invoked.
297 */
298 entity->parent = bfqg->my_entity;
299 entity->sched_data = &bfqg->sched_data;
300
301 if (busy && resume)
302 bfq_activate_bfqq(bfqd, bfqq);
303
304 if (bfqd->in_service_queue == NULL && !bfqd->rq_in_driver)
305 bfq_schedule_dispatch(bfqd);
306}
307
308/**
309 * __bfq_bic_change_cgroup - move @bic to @cgroup.
310 * @bfqd: the queue descriptor.
311 * @bic: the bic to move.
312 * @cgroup: the cgroup to move to.
313 *
314 * Move bic to cgroup, assuming that bfqd->queue is locked; the caller
315 * has to make sure that the reference to cgroup is valid across the call.
316 *
317 * NOTE: an alternative approach might have been to store the current
318 * cgroup in bfqq and getting a reference to it, reducing the lookup
319 * time here, at the price of slightly more complex code.
320 */
321static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
322 struct bfq_io_cq *bic,
323 struct cgroup *cgroup)
324{
325 struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
326 struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
327 struct bfq_entity *entity;
328 struct bfq_group *bfqg;
329 struct bfqio_cgroup *bgrp;
330
331 bgrp = cgroup_to_bfqio(cgroup);
332
333 bfqg = bfq_find_alloc_group(bfqd, cgroup);
334 if (async_bfqq != NULL) {
335 entity = &async_bfqq->entity;
336
337 if (entity->sched_data != &bfqg->sched_data) {
338 bic_set_bfqq(bic, NULL, 0);
339 bfq_log_bfqq(bfqd, async_bfqq,
340 "bic_change_group: %p %d",
341 async_bfqq, atomic_read(&async_bfqq->ref));
342 bfq_put_queue(async_bfqq);
343 }
344 }
345
346 if (sync_bfqq != NULL) {
347 entity = &sync_bfqq->entity;
348 if (entity->sched_data != &bfqg->sched_data)
349 bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg);
350 }
351
352 return bfqg;
353}
354
355/**
356 * bfq_bic_change_cgroup - move @bic to @cgroup.
357 * @bic: the bic being migrated.
358 * @cgroup: the destination cgroup.
359 *
360 * When the task owning @bic is moved to @cgroup, @bic is immediately
361 * moved into its new parent group.
362 */
363static void bfq_bic_change_cgroup(struct bfq_io_cq *bic,
364 struct cgroup *cgroup)
365{
366 struct bfq_data *bfqd;
367 unsigned long uninitialized_var(flags);
368
369 bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data),
370 &flags);
371 if (bfqd != NULL) {
372 __bfq_bic_change_cgroup(bfqd, bic, cgroup);
373 bfq_put_bfqd_unlock(bfqd, &flags);
374 }
375}
376
377/**
378 * bfq_bic_update_cgroup - update the cgroup of @bic.
379 * @bic: the @bic to update.
380 *
381 * Make sure that @bic is enqueued in the cgroup of the current task.
382 * We need this in addition to moving bics during the cgroup attach
383 * phase because the task owning @bic could be at its first disk
384 * access or we may end up in the root cgroup as the result of a
385 * memory allocation failure and here we try to move to the right
386 * group.
387 *
388 * Must be called under the queue lock. It is safe to use the returned
389 * value even after the rcu_read_unlock() as the migration/destruction
390 * paths act under the queue lock too. IOW it is impossible to race with
391 * group migration/destruction and end up with an invalid group as:
392 * a) here cgroup has not yet been destroyed, nor its destroy callback
393 * has started execution, as current holds a reference to it,
394 * b) if it is destroyed after rcu_read_unlock() [after current is
395 * migrated to a different cgroup] its attach() callback will have
396 * taken care of remove all the references to the old cgroup data.
397 */
398static struct bfq_group *bfq_bic_update_cgroup(struct bfq_io_cq *bic)
399{
400 struct bfq_data *bfqd = bic_to_bfqd(bic);
401 struct bfq_group *bfqg;
402 struct cgroup *cgroup;
403
404 BUG_ON(bfqd == NULL);
405
406 rcu_read_lock();
407 cgroup = task_cgroup(current, bfqio_subsys_id);
408 bfqg = __bfq_bic_change_cgroup(bfqd, bic, cgroup);
409 rcu_read_unlock();
410
411 return bfqg;
412}
413
414/**
415 * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st.
416 * @st: the service tree being flushed.
417 */
418static inline void bfq_flush_idle_tree(struct bfq_service_tree *st)
419{
420 struct bfq_entity *entity = st->first_idle;
421
422 for (; entity != NULL; entity = st->first_idle)
423 __bfq_deactivate_entity(entity, 0);
424}
425
426/**
427 * bfq_reparent_leaf_entity - move leaf entity to the root_group.
428 * @bfqd: the device data structure with the root group.
429 * @entity: the entity to move.
430 */
431static inline void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
432 struct bfq_entity *entity)
433{
434 struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
435
436 BUG_ON(bfqq == NULL);
437 bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group);
438 return;
439}
440
441/**
442 * bfq_reparent_active_entities - move to the root group all active
443 * entities.
444 * @bfqd: the device data structure with the root group.
445 * @bfqg: the group to move from.
446 * @st: the service tree with the entities.
447 *
448 * Needs queue_lock to be taken and reference to be valid over the call.
449 */
450static inline void bfq_reparent_active_entities(struct bfq_data *bfqd,
451 struct bfq_group *bfqg,
452 struct bfq_service_tree *st)
453{
454 struct rb_root *active = &st->active;
455 struct bfq_entity *entity = NULL;
456
457 if (!RB_EMPTY_ROOT(&st->active))
458 entity = bfq_entity_of(rb_first(active));
459
460 for (; entity != NULL; entity = bfq_entity_of(rb_first(active)))
461 bfq_reparent_leaf_entity(bfqd, entity);
462
463 if (bfqg->sched_data.in_service_entity != NULL)
464 bfq_reparent_leaf_entity(bfqd,
465 bfqg->sched_data.in_service_entity);
466
467 return;
468}
469
470/**
471 * bfq_destroy_group - destroy @bfqg.
472 * @bgrp: the bfqio_cgroup containing @bfqg.
473 * @bfqg: the group being destroyed.
474 *
475 * Destroy @bfqg, making sure that it is not referenced from its parent.
476 */
477static void bfq_destroy_group(struct bfqio_cgroup *bgrp, struct bfq_group *bfqg)
478{
479 struct bfq_data *bfqd;
480 struct bfq_service_tree *st;
481 struct bfq_entity *entity = bfqg->my_entity;
482 unsigned long uninitialized_var(flags);
483 int i;
484
485 hlist_del(&bfqg->group_node);
486
487 /*
488 * Empty all service_trees belonging to this group before
489 * deactivating the group itself.
490 */
491 for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) {
492 st = bfqg->sched_data.service_tree + i;
493
494 /*
495 * The idle tree may still contain bfq_queues belonging
496 * to exited task because they never migrated to a different
497 * cgroup from the one being destroyed now. No one else
498 * can access them so it's safe to act without any lock.
499 */
500 bfq_flush_idle_tree(st);
501
502 /*
503 * It may happen that some queues are still active
504 * (busy) upon group destruction (if the corresponding
505 * processes have been forced to terminate). We move
506 * all the leaf entities corresponding to these queues
507 * to the root_group.
508 * Also, it may happen that the group has an entity
509 * in service, which is disconnected from the active
510 * tree: it must be moved, too.
511 * There is no need to put the sync queues, as the
512 * scheduler has taken no reference.
513 */
514 bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags);
515 if (bfqd != NULL) {
516 bfq_reparent_active_entities(bfqd, bfqg, st);
517 bfq_put_bfqd_unlock(bfqd, &flags);
518 }
519 BUG_ON(!RB_EMPTY_ROOT(&st->active));
520 BUG_ON(!RB_EMPTY_ROOT(&st->idle));
521 }
522 BUG_ON(bfqg->sched_data.next_in_service != NULL);
523 BUG_ON(bfqg->sched_data.in_service_entity != NULL);
524
525 /*
526 * We may race with device destruction, take extra care when
527 * dereferencing bfqg->bfqd.
528 */
529 bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags);
530 if (bfqd != NULL) {
531 hlist_del(&bfqg->bfqd_node);
532 __bfq_deactivate_entity(entity, 0);
533 bfq_put_async_queues(bfqd, bfqg);
534 bfq_put_bfqd_unlock(bfqd, &flags);
535 }
536 BUG_ON(entity->tree != NULL);
537
538 /*
539 * No need to defer the kfree() to the end of the RCU grace
540 * period: we are called from the destroy() callback of our
541 * cgroup, so we can be sure that no one is a) still using
542 * this cgroup or b) doing lookups in it.
543 */
544 kfree(bfqg);
545}
546
547static void bfq_end_wr_async(struct bfq_data *bfqd)
548{
549 struct hlist_node *pos, *n;
550 struct bfq_group *bfqg;
551
552 hlist_for_each_entry_safe(bfqg, pos, n, &bfqd->group_list, bfqd_node)
553 bfq_end_wr_async_queues(bfqd, bfqg);
554 bfq_end_wr_async_queues(bfqd, bfqd->root_group);
555}
556
557/**
558 * bfq_disconnect_groups - disconnect @bfqd from all its groups.
559 * @bfqd: the device descriptor being exited.
560 *
561 * When the device exits we just make sure that no lookup can return
562 * the now unused group structures. They will be deallocated on cgroup
563 * destruction.
564 */
565static void bfq_disconnect_groups(struct bfq_data *bfqd)
566{
567 struct hlist_node *pos, *n;
568 struct bfq_group *bfqg;
569
570 bfq_log(bfqd, "disconnect_groups beginning");
571 hlist_for_each_entry_safe(bfqg, pos, n, &bfqd->group_list, bfqd_node) {
572 hlist_del(&bfqg->bfqd_node);
573
574 __bfq_deactivate_entity(bfqg->my_entity, 0);
575
576 /*
577 * Don't remove from the group hash, just set an
578 * invalid key. No lookups can race with the
579 * assignment as bfqd is being destroyed; this
580 * implies also that new elements cannot be added
581 * to the list.
582 */
583 rcu_assign_pointer(bfqg->bfqd, NULL);
584
585 bfq_log(bfqd, "disconnect_groups: put async for group %p",
586 bfqg);
587 bfq_put_async_queues(bfqd, bfqg);
588 }
589}
590
591static inline void bfq_free_root_group(struct bfq_data *bfqd)
592{
593 struct bfqio_cgroup *bgrp = &bfqio_root_cgroup;
594 struct bfq_group *bfqg = bfqd->root_group;
595
596 bfq_put_async_queues(bfqd, bfqg);
597
598 spin_lock_irq(&bgrp->lock);
599 hlist_del_rcu(&bfqg->group_node);
600 spin_unlock_irq(&bgrp->lock);
601
602 /*
603 * No need to synchronize_rcu() here: since the device is gone
604 * there cannot be any read-side access to its root_group.
605 */
606 kfree(bfqg);
607}
608
609static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node)
610{
611 struct bfq_group *bfqg;
612 struct bfqio_cgroup *bgrp;
613 int i;
614
615 bfqg = kzalloc_node(sizeof(*bfqg), GFP_KERNEL, node);
616 if (bfqg == NULL)
617 return NULL;
618
619 bfqg->entity.parent = NULL;
620 for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
621 bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
622
623 bgrp = &bfqio_root_cgroup;
624 spin_lock_irq(&bgrp->lock);
625 rcu_assign_pointer(bfqg->bfqd, bfqd);
626 hlist_add_head_rcu(&bfqg->group_node, &bgrp->group_data);
627 spin_unlock_irq(&bgrp->lock);
628
629 return bfqg;
630}
631
632#define SHOW_FUNCTION(__VAR) \
633static u64 bfqio_cgroup_##__VAR##_read(struct cgroup *cgroup, \
634 struct cftype *cftype) \
635{ \
636 struct bfqio_cgroup *bgrp; \
637 u64 ret; \
638 \
639 if (!cgroup_lock_live_group(cgroup)) \
640 return -ENODEV; \
641 \
642 bgrp = cgroup_to_bfqio(cgroup); \
643 spin_lock_irq(&bgrp->lock); \
644 ret = bgrp->__VAR; \
645 spin_unlock_irq(&bgrp->lock); \
646 \
647 cgroup_unlock(); \
648 \
649 return ret; \
650}
651
652SHOW_FUNCTION(weight);
653SHOW_FUNCTION(ioprio);
654SHOW_FUNCTION(ioprio_class);
655#undef SHOW_FUNCTION
656
657#define STORE_FUNCTION(__VAR, __MIN, __MAX) \
658static int bfqio_cgroup_##__VAR##_write(struct cgroup *cgroup, \
659 struct cftype *cftype, \
660 u64 val) \
661{ \
662 struct bfqio_cgroup *bgrp; \
663 struct bfq_group *bfqg; \
664 struct hlist_node *n; \
665 \
666 if (val < (__MIN) || val > (__MAX)) \
667 return -EINVAL; \
668 \
669 if (!cgroup_lock_live_group(cgroup)) \
670 return -ENODEV; \
671 \
672 bgrp = cgroup_to_bfqio(cgroup); \
673 \
674 spin_lock_irq(&bgrp->lock); \
675 bgrp->__VAR = (unsigned short)val; \
676 hlist_for_each_entry(bfqg, n, &bgrp->group_data, group_node) { \
677 /* \
678 * Setting the ioprio_changed flag of the entity \
679 * to 1 with new_##__VAR == ##__VAR would re-set \
680 * the value of the weight to its ioprio mapping. \
681 * Set the flag only if necessary. \
682 */ \
683 if ((unsigned short)val != bfqg->entity.new_##__VAR) { \
684 bfqg->entity.new_##__VAR = (unsigned short)val; \
685 /* \
686 * Make sure that the above new value has been \
687 * stored in bfqg->entity.new_##__VAR before \
688 * setting the ioprio_changed flag. In fact, \
689 * this flag may be read asynchronously (in \
690 * critical sections protected by a different \
691 * lock than that held here), and finding this \
692 * flag set may cause the execution of the code \
693 * for updating parameters whose value may \
694 * depend also on bfqg->entity.new_##__VAR (in \
695 * __bfq_entity_update_weight_prio). \
696 * This barrier makes sure that the new value \
697 * of bfqg->entity.new_##__VAR is correctly \
698 * seen in that code. \
699 */ \
700 smp_wmb(); \
701 bfqg->entity.ioprio_changed = 1; \
702 } \
703 } \
704 spin_unlock_irq(&bgrp->lock); \
705 \
706 cgroup_unlock(); \
707 \
708 return 0; \
709}
710
711STORE_FUNCTION(weight, BFQ_MIN_WEIGHT, BFQ_MAX_WEIGHT);
712STORE_FUNCTION(ioprio, 0, IOPRIO_BE_NR - 1);
713STORE_FUNCTION(ioprio_class, IOPRIO_CLASS_RT, IOPRIO_CLASS_IDLE);
714#undef STORE_FUNCTION
715
716static struct cftype bfqio_files[] = {
717 {
718 .name = "weight",
719 .read_u64 = bfqio_cgroup_weight_read,
720 .write_u64 = bfqio_cgroup_weight_write,
721 },
722 {
723 .name = "ioprio",
724 .read_u64 = bfqio_cgroup_ioprio_read,
725 .write_u64 = bfqio_cgroup_ioprio_write,
726 },
727 {
728 .name = "ioprio_class",
729 .read_u64 = bfqio_cgroup_ioprio_class_read,
730 .write_u64 = bfqio_cgroup_ioprio_class_write,
731 },
732};
733
734static int bfqio_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
735{
736 return cgroup_add_files(cgroup, subsys, bfqio_files,
737 ARRAY_SIZE(bfqio_files));
738}
739
740static struct cgroup_subsys_state *bfqio_create(struct cgroup *cgroup)
741{
742 struct bfqio_cgroup *bgrp;
743
744 if (cgroup->parent != NULL) {
745 bgrp = kzalloc(sizeof(*bgrp), GFP_KERNEL);
746 if (bgrp == NULL)
747 return ERR_PTR(-ENOMEM);
748 } else
749 bgrp = &bfqio_root_cgroup;
750
751 spin_lock_init(&bgrp->lock);
752 INIT_HLIST_HEAD(&bgrp->group_data);
753 bgrp->ioprio = BFQ_DEFAULT_GRP_IOPRIO;
754 bgrp->ioprio_class = BFQ_DEFAULT_GRP_CLASS;
755
756 return &bgrp->css;
757}
758
759/*
760 * We cannot support shared io contexts, as we have no means to support
761 * two tasks with the same ioc in two different groups without major rework
762 * of the main bic/bfqq data structures. By now we allow a task to change
763 * its cgroup only if it's the only owner of its ioc; the drawback of this
764 * behavior is that a group containing a task that forked using CLONE_IO
765 * will not be destroyed until the tasks sharing the ioc die.
766 */
767static int bfqio_can_attach(struct cgroup *cgroup, struct cgroup_taskset *tset)
768{
769 struct task_struct *task;
770 struct io_context *ioc;
771 int ret = 0;
772
773 cgroup_taskset_for_each(task, cgroup, tset) {
774 /* task_lock() is needed to avoid races with exit_io_context() */
775 task_lock(task);
776 ioc = task->io_context;
777 if (ioc != NULL && atomic_read(&ioc->nr_tasks) > 1)
778 /*
779 * ioc == NULL means that the task is either too
780 * young or exiting: if it has still no ioc the
781 * ioc can't be shared, if the task is exiting the
782 * attach will fail anyway, no matter what we
783 * return here.
784 */
785 ret = -EINVAL;
786 task_unlock(task);
787 if (ret)
788 break;
789 }
790
791 return ret;
792}
793
794static void bfqio_attach(struct cgroup *cgroup, struct cgroup_taskset *tset)
795{
796 struct task_struct *task;
797 struct io_context *ioc;
798 struct io_cq *icq;
799 struct hlist_node *n;
800
801 /*
802 * IMPORTANT NOTE: The move of more than one process at a time to a
803 * new group has not yet been tested.
804 */
805 cgroup_taskset_for_each(task, cgroup, tset) {
806 ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
807 if (ioc) {
808 /*
809 * Handle cgroup change here.
810 */
811 rcu_read_lock();
812 hlist_for_each_entry_rcu(icq, n, &ioc->icq_list, ioc_node)
813 if (!strncmp(
814 icq->q->elevator->type->elevator_name,
815 "bfq", ELV_NAME_MAX))
816 bfq_bic_change_cgroup(icq_to_bic(icq),
817 cgroup);
818 rcu_read_unlock();
819 put_io_context(ioc);
820 }
821 }
822}
823
824static void bfqio_destroy(struct cgroup *cgroup)
825{
826 struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup);
827 struct hlist_node *n, *tmp;
828 struct bfq_group *bfqg;
829
830 /*
831 * Since we are destroying the cgroup, there are no more tasks
832 * referencing it, and all the RCU grace periods that may have
833 * referenced it are ended (as the destruction of the parent
834 * cgroup is RCU-safe); bgrp->group_data will not be accessed by
835 * anything else and we don't need any synchronization.
836 */
837 hlist_for_each_entry_safe(bfqg, n, tmp, &bgrp->group_data, group_node)
838 bfq_destroy_group(bgrp, bfqg);
839
840 BUG_ON(!hlist_empty(&bgrp->group_data));
841
842 kfree(bgrp);
843}
844
845struct cgroup_subsys bfqio_subsys = {
846 .name = "bfqio",
847 .create = bfqio_create,
848 .can_attach = bfqio_can_attach,
849 .attach = bfqio_attach,
850 .destroy = bfqio_destroy,
851 .populate = bfqio_populate,
852 .subsys_id = bfqio_subsys_id,
853};
854#else
855static inline void bfq_init_entity(struct bfq_entity *entity,
856 struct bfq_group *bfqg)
857{
858 entity->weight = entity->new_weight;
859 entity->orig_weight = entity->new_weight;
860 entity->ioprio = entity->new_ioprio;
861 entity->ioprio_class = entity->new_ioprio_class;
862 entity->sched_data = &bfqg->sched_data;
863}
864
865static inline struct bfq_group *
866bfq_bic_update_cgroup(struct bfq_io_cq *bic)
867{
868 struct bfq_data *bfqd = bic_to_bfqd(bic);
869 return bfqd->root_group;
870}
871
872static inline void bfq_bfqq_move(struct bfq_data *bfqd,
873 struct bfq_queue *bfqq,
874 struct bfq_entity *entity,
875 struct bfq_group *bfqg)
876{
877}
878
879static void bfq_end_wr_async(struct bfq_data *bfqd)
880{
881 bfq_end_wr_async_queues(bfqd, bfqd->root_group);
882}
883
884static inline void bfq_disconnect_groups(struct bfq_data *bfqd)
885{
886 bfq_put_async_queues(bfqd, bfqd->root_group);
887}
888
889static inline void bfq_free_root_group(struct bfq_data *bfqd)
890{
891 kfree(bfqd->root_group);
892}
893
894static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node)
895{
896 struct bfq_group *bfqg;
897 int i;
898
899 bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node);
900 if (bfqg == NULL)
901 return NULL;
902
903 for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
904 bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
905
906 return bfqg;
907}
908#endif