| /* |
| * BFQ: CGROUPS support. |
| * |
| * Based on ideas and code from CFQ: |
| * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> |
| * |
| * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> |
| * Paolo Valente <paolo.valente@unimore.it> |
| * |
| * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it> |
| * |
| * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ |
| * file. |
| */ |
| |
| #ifdef CONFIG_CGROUP_BFQIO |
| static struct bfqio_cgroup bfqio_root_cgroup = { |
| .weight = BFQ_DEFAULT_GRP_WEIGHT, |
| .ioprio = BFQ_DEFAULT_GRP_IOPRIO, |
| .ioprio_class = BFQ_DEFAULT_GRP_CLASS, |
| }; |
| |
| static inline void bfq_init_entity(struct bfq_entity *entity, |
| struct bfq_group *bfqg) |
| { |
| entity->weight = entity->new_weight; |
| entity->orig_weight = entity->new_weight; |
| entity->ioprio = entity->new_ioprio; |
| entity->ioprio_class = entity->new_ioprio_class; |
| entity->parent = bfqg->my_entity; |
| entity->sched_data = &bfqg->sched_data; |
| } |
| |
| static struct bfqio_cgroup *cgroup_to_bfqio(struct cgroup *cgroup) |
| { |
| return container_of(cgroup_subsys_state(cgroup, bfqio_subsys_id), |
| struct bfqio_cgroup, css); |
| } |
| |
| /* |
| * Search the bfq_group for bfqd into the hash table (by now only a list) |
| * of bgrp. Must be called under rcu_read_lock(). |
| */ |
| static struct bfq_group *bfqio_lookup_group(struct bfqio_cgroup *bgrp, |
| struct bfq_data *bfqd) |
| { |
| struct bfq_group *bfqg; |
| struct hlist_node *n; |
| void *key; |
| |
| hlist_for_each_entry_rcu(bfqg, n, &bgrp->group_data, group_node) { |
| key = rcu_dereference(bfqg->bfqd); |
| if (key == bfqd) |
| return bfqg; |
| } |
| |
| return NULL; |
| } |
| |
| static inline void bfq_group_init_entity(struct bfqio_cgroup *bgrp, |
| struct bfq_group *bfqg) |
| { |
| struct bfq_entity *entity = &bfqg->entity; |
| |
| /* |
| * If the weight of the entity has never been set via the sysfs |
| * interface, then bgrp->weight == 0. In this case we initialize |
| * the weight from the current ioprio value. Otherwise, the group |
| * weight, if set, has priority over the ioprio value. |
| */ |
| if (bgrp->weight == 0) { |
| entity->new_weight = bfq_ioprio_to_weight(bgrp->ioprio); |
| entity->new_ioprio = bgrp->ioprio; |
| } else { |
| if (bgrp->weight < BFQ_MIN_WEIGHT || |
| bgrp->weight > BFQ_MAX_WEIGHT) { |
| printk(KERN_CRIT "bfq_group_init_entity: " |
| "bgrp->weight %d\n", bgrp->weight); |
| BUG(); |
| } |
| entity->new_weight = bgrp->weight; |
| entity->new_ioprio = bfq_weight_to_ioprio(bgrp->weight); |
| } |
| entity->orig_weight = entity->weight = entity->new_weight; |
| entity->ioprio = entity->new_ioprio; |
| entity->ioprio_class = entity->new_ioprio_class = bgrp->ioprio_class; |
| entity->my_sched_data = &bfqg->sched_data; |
| bfqg->active_entities = 0; |
| } |
| |
| static inline void bfq_group_set_parent(struct bfq_group *bfqg, |
| struct bfq_group *parent) |
| { |
| struct bfq_entity *entity; |
| |
| BUG_ON(parent == NULL); |
| BUG_ON(bfqg == NULL); |
| |
| entity = &bfqg->entity; |
| entity->parent = parent->my_entity; |
| entity->sched_data = &parent->sched_data; |
| } |
| |
| /** |
| * bfq_group_chain_alloc - allocate a chain of groups. |
| * @bfqd: queue descriptor. |
| * @cgroup: the leaf cgroup this chain starts from. |
| * |
| * Allocate a chain of groups starting from the one belonging to |
| * @cgroup up to the root cgroup. Stop if a cgroup on the chain |
| * to the root has already an allocated group on @bfqd. |
| */ |
| static struct bfq_group *bfq_group_chain_alloc(struct bfq_data *bfqd, |
| struct cgroup *cgroup) |
| { |
| struct bfqio_cgroup *bgrp; |
| struct bfq_group *bfqg, *prev = NULL, *leaf = NULL; |
| |
| for (; cgroup != NULL; cgroup = cgroup->parent) { |
| bgrp = cgroup_to_bfqio(cgroup); |
| |
| bfqg = bfqio_lookup_group(bgrp, bfqd); |
| if (bfqg != NULL) { |
| /* |
| * All the cgroups in the path from there to the |
| * root must have a bfq_group for bfqd, so we don't |
| * need any more allocations. |
| */ |
| break; |
| } |
| |
| bfqg = kzalloc(sizeof(*bfqg), GFP_ATOMIC); |
| if (bfqg == NULL) |
| goto cleanup; |
| |
| bfq_group_init_entity(bgrp, bfqg); |
| bfqg->my_entity = &bfqg->entity; |
| |
| if (leaf == NULL) { |
| leaf = bfqg; |
| prev = leaf; |
| } else { |
| bfq_group_set_parent(prev, bfqg); |
| /* |
| * Build a list of allocated nodes using the bfqd |
| * filed, that is still unused and will be |
| * initialized only after the node will be |
| * connected. |
| */ |
| prev->bfqd = bfqg; |
| prev = bfqg; |
| } |
| } |
| |
| return leaf; |
| |
| cleanup: |
| while (leaf != NULL) { |
| prev = leaf; |
| leaf = leaf->bfqd; |
| kfree(prev); |
| } |
| |
| return NULL; |
| } |
| |
| /** |
| * bfq_group_chain_link - link an allocated group chain to a cgroup |
| * hierarchy. |
| * @bfqd: the queue descriptor. |
| * @cgroup: the leaf cgroup to start from. |
| * @leaf: the leaf group (to be associated to @cgroup). |
| * |
| * Try to link a chain of groups to a cgroup hierarchy, connecting the |
| * nodes bottom-up, so we can be sure that when we find a cgroup in the |
| * hierarchy that already as a group associated to @bfqd all the nodes |
| * in the path to the root cgroup have one too. |
| * |
| * On locking: the queue lock protects the hierarchy (there is a hierarchy |
| * per device) while the bfqio_cgroup lock protects the list of groups |
| * belonging to the same cgroup. |
| */ |
| static void bfq_group_chain_link(struct bfq_data *bfqd, struct cgroup *cgroup, |
| struct bfq_group *leaf) |
| { |
| struct bfqio_cgroup *bgrp; |
| struct bfq_group *bfqg, *next, *prev = NULL; |
| unsigned long flags; |
| |
| assert_spin_locked(bfqd->queue->queue_lock); |
| |
| for (; cgroup != NULL && leaf != NULL; cgroup = cgroup->parent) { |
| bgrp = cgroup_to_bfqio(cgroup); |
| next = leaf->bfqd; |
| |
| bfqg = bfqio_lookup_group(bgrp, bfqd); |
| BUG_ON(bfqg != NULL); |
| |
| spin_lock_irqsave(&bgrp->lock, flags); |
| |
| rcu_assign_pointer(leaf->bfqd, bfqd); |
| hlist_add_head_rcu(&leaf->group_node, &bgrp->group_data); |
| hlist_add_head(&leaf->bfqd_node, &bfqd->group_list); |
| |
| spin_unlock_irqrestore(&bgrp->lock, flags); |
| |
| prev = leaf; |
| leaf = next; |
| } |
| |
| BUG_ON(cgroup == NULL && leaf != NULL); |
| if (cgroup != NULL && prev != NULL) { |
| bgrp = cgroup_to_bfqio(cgroup); |
| bfqg = bfqio_lookup_group(bgrp, bfqd); |
| bfq_group_set_parent(prev, bfqg); |
| } |
| } |
| |
| /** |
| * bfq_find_alloc_group - return the group associated to @bfqd in @cgroup. |
| * @bfqd: queue descriptor. |
| * @cgroup: cgroup being searched for. |
| * |
| * Return a group associated to @bfqd in @cgroup, allocating one if |
| * necessary. When a group is returned all the cgroups in the path |
| * to the root have a group associated to @bfqd. |
| * |
| * If the allocation fails, return the root group: this breaks guarantees |
| * but is a safe fallback. If this loss becomes a problem it can be |
| * mitigated using the equivalent weight (given by the product of the |
| * weights of the groups in the path from @group to the root) in the |
| * root scheduler. |
| * |
| * We allocate all the missing nodes in the path from the leaf cgroup |
| * to the root and we connect the nodes only after all the allocations |
| * have been successful. |
| */ |
| static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd, |
| struct cgroup *cgroup) |
| { |
| struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup); |
| struct bfq_group *bfqg; |
| |
| bfqg = bfqio_lookup_group(bgrp, bfqd); |
| if (bfqg != NULL) |
| return bfqg; |
| |
| bfqg = bfq_group_chain_alloc(bfqd, cgroup); |
| if (bfqg != NULL) |
| bfq_group_chain_link(bfqd, cgroup, bfqg); |
| else |
| bfqg = bfqd->root_group; |
| |
| return bfqg; |
| } |
| |
| /** |
| * bfq_bfqq_move - migrate @bfqq to @bfqg. |
| * @bfqd: queue descriptor. |
| * @bfqq: the queue to move. |
| * @entity: @bfqq's entity. |
| * @bfqg: the group to move to. |
| * |
| * Move @bfqq to @bfqg, deactivating it from its old group and reactivating |
| * it on the new one. Avoid putting the entity on the old group idle tree. |
| * |
| * Must be called under the queue lock; the cgroup owning @bfqg must |
| * not disappear (by now this just means that we are called under |
| * rcu_read_lock()). |
| */ |
| static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, |
| struct bfq_entity *entity, struct bfq_group *bfqg) |
| { |
| int busy, resume; |
| |
| busy = bfq_bfqq_busy(bfqq); |
| resume = !RB_EMPTY_ROOT(&bfqq->sort_list); |
| |
| BUG_ON(resume && !entity->on_st); |
| BUG_ON(busy && !resume && entity->on_st && |
| bfqq != bfqd->in_service_queue); |
| |
| if (busy) { |
| BUG_ON(atomic_read(&bfqq->ref) < 2); |
| |
| if (!resume) |
| bfq_del_bfqq_busy(bfqd, bfqq, 0); |
| else |
| bfq_deactivate_bfqq(bfqd, bfqq, 0); |
| } else if (entity->on_st) |
| bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); |
| |
| /* |
| * Here we use a reference to bfqg. We don't need a refcounter |
| * as the cgroup reference will not be dropped, so that its |
| * destroy() callback will not be invoked. |
| */ |
| entity->parent = bfqg->my_entity; |
| entity->sched_data = &bfqg->sched_data; |
| |
| if (busy && resume) |
| bfq_activate_bfqq(bfqd, bfqq); |
| |
| if (bfqd->in_service_queue == NULL && !bfqd->rq_in_driver) |
| bfq_schedule_dispatch(bfqd); |
| } |
| |
| /** |
| * __bfq_bic_change_cgroup - move @bic to @cgroup. |
| * @bfqd: the queue descriptor. |
| * @bic: the bic to move. |
| * @cgroup: the cgroup to move to. |
| * |
| * Move bic to cgroup, assuming that bfqd->queue is locked; the caller |
| * has to make sure that the reference to cgroup is valid across the call. |
| * |
| * NOTE: an alternative approach might have been to store the current |
| * cgroup in bfqq and getting a reference to it, reducing the lookup |
| * time here, at the price of slightly more complex code. |
| */ |
| static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, |
| struct bfq_io_cq *bic, |
| struct cgroup *cgroup) |
| { |
| struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0); |
| struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1); |
| struct bfq_entity *entity; |
| struct bfq_group *bfqg; |
| struct bfqio_cgroup *bgrp; |
| |
| bgrp = cgroup_to_bfqio(cgroup); |
| |
| bfqg = bfq_find_alloc_group(bfqd, cgroup); |
| if (async_bfqq != NULL) { |
| entity = &async_bfqq->entity; |
| |
| if (entity->sched_data != &bfqg->sched_data) { |
| bic_set_bfqq(bic, NULL, 0); |
| bfq_log_bfqq(bfqd, async_bfqq, |
| "bic_change_group: %p %d", |
| async_bfqq, atomic_read(&async_bfqq->ref)); |
| bfq_put_queue(async_bfqq); |
| } |
| } |
| |
| if (sync_bfqq != NULL) { |
| entity = &sync_bfqq->entity; |
| if (entity->sched_data != &bfqg->sched_data) |
| bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg); |
| } |
| |
| return bfqg; |
| } |
| |
| /** |
| * bfq_bic_change_cgroup - move @bic to @cgroup. |
| * @bic: the bic being migrated. |
| * @cgroup: the destination cgroup. |
| * |
| * When the task owning @bic is moved to @cgroup, @bic is immediately |
| * moved into its new parent group. |
| */ |
| static void bfq_bic_change_cgroup(struct bfq_io_cq *bic, |
| struct cgroup *cgroup) |
| { |
| struct bfq_data *bfqd; |
| unsigned long uninitialized_var(flags); |
| |
| bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data), |
| &flags); |
| if (bfqd != NULL) { |
| __bfq_bic_change_cgroup(bfqd, bic, cgroup); |
| bfq_put_bfqd_unlock(bfqd, &flags); |
| } |
| } |
| |
| /** |
| * bfq_bic_update_cgroup - update the cgroup of @bic. |
| * @bic: the @bic to update. |
| * |
| * Make sure that @bic is enqueued in the cgroup of the current task. |
| * We need this in addition to moving bics during the cgroup attach |
| * phase because the task owning @bic could be at its first disk |
| * access or we may end up in the root cgroup as the result of a |
| * memory allocation failure and here we try to move to the right |
| * group. |
| * |
| * Must be called under the queue lock. It is safe to use the returned |
| * value even after the rcu_read_unlock() as the migration/destruction |
| * paths act under the queue lock too. IOW it is impossible to race with |
| * group migration/destruction and end up with an invalid group as: |
| * a) here cgroup has not yet been destroyed, nor its destroy callback |
| * has started execution, as current holds a reference to it, |
| * b) if it is destroyed after rcu_read_unlock() [after current is |
| * migrated to a different cgroup] its attach() callback will have |
| * taken care of remove all the references to the old cgroup data. |
| */ |
| static struct bfq_group *bfq_bic_update_cgroup(struct bfq_io_cq *bic) |
| { |
| struct bfq_data *bfqd = bic_to_bfqd(bic); |
| struct bfq_group *bfqg; |
| struct cgroup *cgroup; |
| |
| BUG_ON(bfqd == NULL); |
| |
| rcu_read_lock(); |
| cgroup = task_cgroup(current, bfqio_subsys_id); |
| bfqg = __bfq_bic_change_cgroup(bfqd, bic, cgroup); |
| rcu_read_unlock(); |
| |
| return bfqg; |
| } |
| |
| /** |
| * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st. |
| * @st: the service tree being flushed. |
| */ |
| static inline void bfq_flush_idle_tree(struct bfq_service_tree *st) |
| { |
| struct bfq_entity *entity = st->first_idle; |
| |
| for (; entity != NULL; entity = st->first_idle) |
| __bfq_deactivate_entity(entity, 0); |
| } |
| |
| /** |
| * bfq_reparent_leaf_entity - move leaf entity to the root_group. |
| * @bfqd: the device data structure with the root group. |
| * @entity: the entity to move. |
| */ |
| static inline void bfq_reparent_leaf_entity(struct bfq_data *bfqd, |
| struct bfq_entity *entity) |
| { |
| struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); |
| |
| BUG_ON(bfqq == NULL); |
| bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group); |
| return; |
| } |
| |
| /** |
| * bfq_reparent_active_entities - move to the root group all active |
| * entities. |
| * @bfqd: the device data structure with the root group. |
| * @bfqg: the group to move from. |
| * @st: the service tree with the entities. |
| * |
| * Needs queue_lock to be taken and reference to be valid over the call. |
| */ |
| static inline void bfq_reparent_active_entities(struct bfq_data *bfqd, |
| struct bfq_group *bfqg, |
| struct bfq_service_tree *st) |
| { |
| struct rb_root *active = &st->active; |
| struct bfq_entity *entity = NULL; |
| |
| if (!RB_EMPTY_ROOT(&st->active)) |
| entity = bfq_entity_of(rb_first(active)); |
| |
| for (; entity != NULL; entity = bfq_entity_of(rb_first(active))) |
| bfq_reparent_leaf_entity(bfqd, entity); |
| |
| if (bfqg->sched_data.in_service_entity != NULL) |
| bfq_reparent_leaf_entity(bfqd, |
| bfqg->sched_data.in_service_entity); |
| |
| return; |
| } |
| |
| /** |
| * bfq_destroy_group - destroy @bfqg. |
| * @bgrp: the bfqio_cgroup containing @bfqg. |
| * @bfqg: the group being destroyed. |
| * |
| * Destroy @bfqg, making sure that it is not referenced from its parent. |
| */ |
| static void bfq_destroy_group(struct bfqio_cgroup *bgrp, struct bfq_group *bfqg) |
| { |
| struct bfq_data *bfqd; |
| struct bfq_service_tree *st; |
| struct bfq_entity *entity = bfqg->my_entity; |
| unsigned long uninitialized_var(flags); |
| int i; |
| |
| hlist_del(&bfqg->group_node); |
| |
| /* |
| * Empty all service_trees belonging to this group before |
| * deactivating the group itself. |
| */ |
| for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) { |
| st = bfqg->sched_data.service_tree + i; |
| |
| /* |
| * The idle tree may still contain bfq_queues belonging |
| * to exited task because they never migrated to a different |
| * cgroup from the one being destroyed now. No one else |
| * can access them so it's safe to act without any lock. |
| */ |
| bfq_flush_idle_tree(st); |
| |
| /* |
| * It may happen that some queues are still active |
| * (busy) upon group destruction (if the corresponding |
| * processes have been forced to terminate). We move |
| * all the leaf entities corresponding to these queues |
| * to the root_group. |
| * Also, it may happen that the group has an entity |
| * in service, which is disconnected from the active |
| * tree: it must be moved, too. |
| * There is no need to put the sync queues, as the |
| * scheduler has taken no reference. |
| */ |
| bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags); |
| if (bfqd != NULL) { |
| bfq_reparent_active_entities(bfqd, bfqg, st); |
| bfq_put_bfqd_unlock(bfqd, &flags); |
| } |
| BUG_ON(!RB_EMPTY_ROOT(&st->active)); |
| BUG_ON(!RB_EMPTY_ROOT(&st->idle)); |
| } |
| BUG_ON(bfqg->sched_data.next_in_service != NULL); |
| BUG_ON(bfqg->sched_data.in_service_entity != NULL); |
| |
| /* |
| * We may race with device destruction, take extra care when |
| * dereferencing bfqg->bfqd. |
| */ |
| bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags); |
| if (bfqd != NULL) { |
| hlist_del(&bfqg->bfqd_node); |
| __bfq_deactivate_entity(entity, 0); |
| bfq_put_async_queues(bfqd, bfqg); |
| bfq_put_bfqd_unlock(bfqd, &flags); |
| } |
| BUG_ON(entity->tree != NULL); |
| |
| /* |
| * No need to defer the kfree() to the end of the RCU grace |
| * period: we are called from the destroy() callback of our |
| * cgroup, so we can be sure that no one is a) still using |
| * this cgroup or b) doing lookups in it. |
| */ |
| kfree(bfqg); |
| } |
| |
| static void bfq_end_wr_async(struct bfq_data *bfqd) |
| { |
| struct hlist_node *pos, *n; |
| struct bfq_group *bfqg; |
| |
| hlist_for_each_entry_safe(bfqg, pos, n, &bfqd->group_list, bfqd_node) |
| bfq_end_wr_async_queues(bfqd, bfqg); |
| bfq_end_wr_async_queues(bfqd, bfqd->root_group); |
| } |
| |
| /** |
| * bfq_disconnect_groups - disconnect @bfqd from all its groups. |
| * @bfqd: the device descriptor being exited. |
| * |
| * When the device exits we just make sure that no lookup can return |
| * the now unused group structures. They will be deallocated on cgroup |
| * destruction. |
| */ |
| static void bfq_disconnect_groups(struct bfq_data *bfqd) |
| { |
| struct hlist_node *pos, *n; |
| struct bfq_group *bfqg; |
| |
| bfq_log(bfqd, "disconnect_groups beginning"); |
| hlist_for_each_entry_safe(bfqg, pos, n, &bfqd->group_list, bfqd_node) { |
| hlist_del(&bfqg->bfqd_node); |
| |
| __bfq_deactivate_entity(bfqg->my_entity, 0); |
| |
| /* |
| * Don't remove from the group hash, just set an |
| * invalid key. No lookups can race with the |
| * assignment as bfqd is being destroyed; this |
| * implies also that new elements cannot be added |
| * to the list. |
| */ |
| rcu_assign_pointer(bfqg->bfqd, NULL); |
| |
| bfq_log(bfqd, "disconnect_groups: put async for group %p", |
| bfqg); |
| bfq_put_async_queues(bfqd, bfqg); |
| } |
| } |
| |
| static inline void bfq_free_root_group(struct bfq_data *bfqd) |
| { |
| struct bfqio_cgroup *bgrp = &bfqio_root_cgroup; |
| struct bfq_group *bfqg = bfqd->root_group; |
| |
| bfq_put_async_queues(bfqd, bfqg); |
| |
| spin_lock_irq(&bgrp->lock); |
| hlist_del_rcu(&bfqg->group_node); |
| spin_unlock_irq(&bgrp->lock); |
| |
| /* |
| * No need to synchronize_rcu() here: since the device is gone |
| * there cannot be any read-side access to its root_group. |
| */ |
| kfree(bfqg); |
| } |
| |
| static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node) |
| { |
| struct bfq_group *bfqg; |
| struct bfqio_cgroup *bgrp; |
| int i; |
| |
| bfqg = kzalloc_node(sizeof(*bfqg), GFP_KERNEL, node); |
| if (bfqg == NULL) |
| return NULL; |
| |
| bfqg->entity.parent = NULL; |
| for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) |
| bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; |
| |
| bgrp = &bfqio_root_cgroup; |
| spin_lock_irq(&bgrp->lock); |
| rcu_assign_pointer(bfqg->bfqd, bfqd); |
| hlist_add_head_rcu(&bfqg->group_node, &bgrp->group_data); |
| spin_unlock_irq(&bgrp->lock); |
| |
| return bfqg; |
| } |
| |
| #define SHOW_FUNCTION(__VAR) \ |
| static u64 bfqio_cgroup_##__VAR##_read(struct cgroup *cgroup, \ |
| struct cftype *cftype) \ |
| { \ |
| struct bfqio_cgroup *bgrp; \ |
| u64 ret; \ |
| \ |
| if (!cgroup_lock_live_group(cgroup)) \ |
| return -ENODEV; \ |
| \ |
| bgrp = cgroup_to_bfqio(cgroup); \ |
| spin_lock_irq(&bgrp->lock); \ |
| ret = bgrp->__VAR; \ |
| spin_unlock_irq(&bgrp->lock); \ |
| \ |
| cgroup_unlock(); \ |
| \ |
| return ret; \ |
| } |
| |
| SHOW_FUNCTION(weight); |
| SHOW_FUNCTION(ioprio); |
| SHOW_FUNCTION(ioprio_class); |
| #undef SHOW_FUNCTION |
| |
| #define STORE_FUNCTION(__VAR, __MIN, __MAX) \ |
| static int bfqio_cgroup_##__VAR##_write(struct cgroup *cgroup, \ |
| struct cftype *cftype, \ |
| u64 val) \ |
| { \ |
| struct bfqio_cgroup *bgrp; \ |
| struct bfq_group *bfqg; \ |
| struct hlist_node *n; \ |
| \ |
| if (val < (__MIN) || val > (__MAX)) \ |
| return -EINVAL; \ |
| \ |
| if (!cgroup_lock_live_group(cgroup)) \ |
| return -ENODEV; \ |
| \ |
| bgrp = cgroup_to_bfqio(cgroup); \ |
| \ |
| spin_lock_irq(&bgrp->lock); \ |
| bgrp->__VAR = (unsigned short)val; \ |
| hlist_for_each_entry(bfqg, n, &bgrp->group_data, group_node) { \ |
| /* \ |
| * Setting the ioprio_changed flag of the entity \ |
| * to 1 with new_##__VAR == ##__VAR would re-set \ |
| * the value of the weight to its ioprio mapping. \ |
| * Set the flag only if necessary. \ |
| */ \ |
| if ((unsigned short)val != bfqg->entity.new_##__VAR) { \ |
| bfqg->entity.new_##__VAR = (unsigned short)val; \ |
| /* \ |
| * Make sure that the above new value has been \ |
| * stored in bfqg->entity.new_##__VAR before \ |
| * setting the ioprio_changed flag. In fact, \ |
| * this flag may be read asynchronously (in \ |
| * critical sections protected by a different \ |
| * lock than that held here), and finding this \ |
| * flag set may cause the execution of the code \ |
| * for updating parameters whose value may \ |
| * depend also on bfqg->entity.new_##__VAR (in \ |
| * __bfq_entity_update_weight_prio). \ |
| * This barrier makes sure that the new value \ |
| * of bfqg->entity.new_##__VAR is correctly \ |
| * seen in that code. \ |
| */ \ |
| smp_wmb(); \ |
| bfqg->entity.ioprio_changed = 1; \ |
| } \ |
| } \ |
| spin_unlock_irq(&bgrp->lock); \ |
| \ |
| cgroup_unlock(); \ |
| \ |
| return 0; \ |
| } |
| |
| STORE_FUNCTION(weight, BFQ_MIN_WEIGHT, BFQ_MAX_WEIGHT); |
| STORE_FUNCTION(ioprio, 0, IOPRIO_BE_NR - 1); |
| STORE_FUNCTION(ioprio_class, IOPRIO_CLASS_RT, IOPRIO_CLASS_IDLE); |
| #undef STORE_FUNCTION |
| |
| static struct cftype bfqio_files[] = { |
| { |
| .name = "weight", |
| .read_u64 = bfqio_cgroup_weight_read, |
| .write_u64 = bfqio_cgroup_weight_write, |
| }, |
| { |
| .name = "ioprio", |
| .read_u64 = bfqio_cgroup_ioprio_read, |
| .write_u64 = bfqio_cgroup_ioprio_write, |
| }, |
| { |
| .name = "ioprio_class", |
| .read_u64 = bfqio_cgroup_ioprio_class_read, |
| .write_u64 = bfqio_cgroup_ioprio_class_write, |
| }, |
| }; |
| |
| static int bfqio_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup) |
| { |
| return cgroup_add_files(cgroup, subsys, bfqio_files, |
| ARRAY_SIZE(bfqio_files)); |
| } |
| |
| static struct cgroup_subsys_state *bfqio_create(struct cgroup *cgroup) |
| { |
| struct bfqio_cgroup *bgrp; |
| |
| if (cgroup->parent != NULL) { |
| bgrp = kzalloc(sizeof(*bgrp), GFP_KERNEL); |
| if (bgrp == NULL) |
| return ERR_PTR(-ENOMEM); |
| } else |
| bgrp = &bfqio_root_cgroup; |
| |
| spin_lock_init(&bgrp->lock); |
| INIT_HLIST_HEAD(&bgrp->group_data); |
| bgrp->ioprio = BFQ_DEFAULT_GRP_IOPRIO; |
| bgrp->ioprio_class = BFQ_DEFAULT_GRP_CLASS; |
| |
| return &bgrp->css; |
| } |
| |
| /* |
| * We cannot support shared io contexts, as we have no means to support |
| * two tasks with the same ioc in two different groups without major rework |
| * of the main bic/bfqq data structures. By now we allow a task to change |
| * its cgroup only if it's the only owner of its ioc; the drawback of this |
| * behavior is that a group containing a task that forked using CLONE_IO |
| * will not be destroyed until the tasks sharing the ioc die. |
| */ |
| static int bfqio_can_attach(struct cgroup *cgroup, struct cgroup_taskset *tset) |
| { |
| struct task_struct *task; |
| struct io_context *ioc; |
| int ret = 0; |
| |
| cgroup_taskset_for_each(task, cgroup, tset) { |
| /* task_lock() is needed to avoid races with exit_io_context() */ |
| task_lock(task); |
| ioc = task->io_context; |
| if (ioc != NULL && atomic_read(&ioc->nr_tasks) > 1) |
| /* |
| * ioc == NULL means that the task is either too |
| * young or exiting: if it has still no ioc the |
| * ioc can't be shared, if the task is exiting the |
| * attach will fail anyway, no matter what we |
| * return here. |
| */ |
| ret = -EINVAL; |
| task_unlock(task); |
| if (ret) |
| break; |
| } |
| |
| return ret; |
| } |
| |
| static void bfqio_attach(struct cgroup *cgroup, struct cgroup_taskset *tset) |
| { |
| struct task_struct *task; |
| struct io_context *ioc; |
| struct io_cq *icq; |
| struct hlist_node *n; |
| |
| /* |
| * IMPORTANT NOTE: The move of more than one process at a time to a |
| * new group has not yet been tested. |
| */ |
| cgroup_taskset_for_each(task, cgroup, tset) { |
| ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE); |
| if (ioc) { |
| /* |
| * Handle cgroup change here. |
| */ |
| rcu_read_lock(); |
| hlist_for_each_entry_rcu(icq, n, &ioc->icq_list, ioc_node) |
| if (!strncmp( |
| icq->q->elevator->type->elevator_name, |
| "bfq", ELV_NAME_MAX)) |
| bfq_bic_change_cgroup(icq_to_bic(icq), |
| cgroup); |
| rcu_read_unlock(); |
| put_io_context(ioc); |
| } |
| } |
| } |
| |
| static void bfqio_destroy(struct cgroup *cgroup) |
| { |
| struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup); |
| struct hlist_node *n, *tmp; |
| struct bfq_group *bfqg; |
| |
| /* |
| * Since we are destroying the cgroup, there are no more tasks |
| * referencing it, and all the RCU grace periods that may have |
| * referenced it are ended (as the destruction of the parent |
| * cgroup is RCU-safe); bgrp->group_data will not be accessed by |
| * anything else and we don't need any synchronization. |
| */ |
| hlist_for_each_entry_safe(bfqg, n, tmp, &bgrp->group_data, group_node) |
| bfq_destroy_group(bgrp, bfqg); |
| |
| BUG_ON(!hlist_empty(&bgrp->group_data)); |
| |
| kfree(bgrp); |
| } |
| |
| struct cgroup_subsys bfqio_subsys = { |
| .name = "bfqio", |
| .create = bfqio_create, |
| .can_attach = bfqio_can_attach, |
| .attach = bfqio_attach, |
| .destroy = bfqio_destroy, |
| .populate = bfqio_populate, |
| .subsys_id = bfqio_subsys_id, |
| }; |
| #else |
| static inline void bfq_init_entity(struct bfq_entity *entity, |
| struct bfq_group *bfqg) |
| { |
| entity->weight = entity->new_weight; |
| entity->orig_weight = entity->new_weight; |
| entity->ioprio = entity->new_ioprio; |
| entity->ioprio_class = entity->new_ioprio_class; |
| entity->sched_data = &bfqg->sched_data; |
| } |
| |
| static inline struct bfq_group * |
| bfq_bic_update_cgroup(struct bfq_io_cq *bic) |
| { |
| struct bfq_data *bfqd = bic_to_bfqd(bic); |
| return bfqd->root_group; |
| } |
| |
| static inline void bfq_bfqq_move(struct bfq_data *bfqd, |
| struct bfq_queue *bfqq, |
| struct bfq_entity *entity, |
| struct bfq_group *bfqg) |
| { |
| } |
| |
| static void bfq_end_wr_async(struct bfq_data *bfqd) |
| { |
| bfq_end_wr_async_queues(bfqd, bfqd->root_group); |
| } |
| |
| static inline void bfq_disconnect_groups(struct bfq_data *bfqd) |
| { |
| bfq_put_async_queues(bfqd, bfqd->root_group); |
| } |
| |
| static inline void bfq_free_root_group(struct bfq_data *bfqd) |
| { |
| kfree(bfqd->root_group); |
| } |
| |
| static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node) |
| { |
| struct bfq_group *bfqg; |
| int i; |
| |
| bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node); |
| if (bfqg == NULL) |
| return NULL; |
| |
| for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) |
| bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; |
| |
| return bfqg; |
| } |
| #endif |