Blame - block/bfq-cgroup.c - kernel/msm

blob: 8399c920c451bbff5e8ce9073bd0ca27a036259d [file] [log] [blame]

Paolo Valente	70f2871	2013-05-09 19:10:02 +0200	[diff] [blame]	1	/*
				2	* BFQ: CGROUPS support.
				3	*
				4	* Based on ideas and code from CFQ:
				5	* Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
				6	*
				7	* Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
				8	* Paolo Valente <paolo.valente@unimore.it>
				9	*
				10	* Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
				11	*
				12	* Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ
				13	* file.
				14	*/
				15
				16	#ifdef CONFIG_CGROUP_BFQIO
				17	static struct bfqio_cgroup bfqio_root_cgroup = {
				18	.weight = BFQ_DEFAULT_GRP_WEIGHT,
				19	.ioprio = BFQ_DEFAULT_GRP_IOPRIO,
				20	.ioprio_class = BFQ_DEFAULT_GRP_CLASS,
				21	};
				22
				23	static inline void bfq_init_entity(struct bfq_entity *entity,
				24	struct bfq_group *bfqg)
				25	{
				26	entity->weight = entity->new_weight;
				27	entity->orig_weight = entity->new_weight;
				28	entity->ioprio = entity->new_ioprio;
				29	entity->ioprio_class = entity->new_ioprio_class;
				30	entity->parent = bfqg->my_entity;
				31	entity->sched_data = &bfqg->sched_data;
				32	}
				33
				34	static struct bfqio_cgroup cgroup_to_bfqio(struct cgroup cgroup)
				35	{
				36	return container_of(cgroup_subsys_state(cgroup, bfqio_subsys_id),
				37	struct bfqio_cgroup, css);
				38	}
				39
				40	/*
				41	* Search the bfq_group for bfqd into the hash table (by now only a list)
				42	* of bgrp. Must be called under rcu_read_lock().
				43	*/
				44	static struct bfq_group bfqio_lookup_group(struct bfqio_cgroup bgrp,
				45	struct bfq_data *bfqd)
				46	{
				47	struct bfq_group *bfqg;
				48	struct hlist_node *n;
				49	void *key;
				50
				51	hlist_for_each_entry_rcu(bfqg, n, &bgrp->group_data, group_node) {
				52	key = rcu_dereference(bfqg->bfqd);
				53	if (key == bfqd)
				54	return bfqg;
				55	}
				56
				57	return NULL;
				58	}
				59
				60	static inline void bfq_group_init_entity(struct bfqio_cgroup *bgrp,
				61	struct bfq_group *bfqg)
				62	{
				63	struct bfq_entity *entity = &bfqg->entity;
				64
				65	/*
				66	* If the weight of the entity has never been set via the sysfs
				67	* interface, then bgrp->weight == 0. In this case we initialize
				68	* the weight from the current ioprio value. Otherwise, the group
				69	* weight, if set, has priority over the ioprio value.
				70	*/
				71	if (bgrp->weight == 0) {
				72	entity->new_weight = bfq_ioprio_to_weight(bgrp->ioprio);
				73	entity->new_ioprio = bgrp->ioprio;
				74	} else {
				75	if (bgrp->weight < BFQ_MIN_WEIGHT \|\|
				76	bgrp->weight > BFQ_MAX_WEIGHT) {
				77	printk(KERN_CRIT "bfq_group_init_entity: "
				78	"bgrp->weight %d\n", bgrp->weight);
				79	BUG();
				80	}
				81	entity->new_weight = bgrp->weight;
				82	entity->new_ioprio = bfq_weight_to_ioprio(bgrp->weight);
				83	}
				84	entity->orig_weight = entity->weight = entity->new_weight;
				85	entity->ioprio = entity->new_ioprio;
				86	entity->ioprio_class = entity->new_ioprio_class = bgrp->ioprio_class;
				87	entity->my_sched_data = &bfqg->sched_data;
				88	bfqg->active_entities = 0;
				89	}
				90
				91	static inline void bfq_group_set_parent(struct bfq_group *bfqg,
				92	struct bfq_group *parent)
				93	{
				94	struct bfq_entity *entity;
				95
				96	BUG_ON(parent == NULL);
				97	BUG_ON(bfqg == NULL);
				98
				99	entity = &bfqg->entity;
				100	entity->parent = parent->my_entity;
				101	entity->sched_data = &parent->sched_data;
				102	}
				103
				104	/**
				105	* bfq_group_chain_alloc - allocate a chain of groups.
				106	* @bfqd: queue descriptor.
				107	* @cgroup: the leaf cgroup this chain starts from.
				108	*
				109	* Allocate a chain of groups starting from the one belonging to
				110	* @cgroup up to the root cgroup. Stop if a cgroup on the chain
				111	* to the root has already an allocated group on @bfqd.
				112	*/
				113	static struct bfq_group bfq_group_chain_alloc(struct bfq_data bfqd,
				114	struct cgroup *cgroup)
				115	{
				116	struct bfqio_cgroup *bgrp;
				117	struct bfq_group bfqg, prev = NULL, *leaf = NULL;
				118
				119	for (; cgroup != NULL; cgroup = cgroup->parent) {
				120	bgrp = cgroup_to_bfqio(cgroup);
				121
				122	bfqg = bfqio_lookup_group(bgrp, bfqd);
				123	if (bfqg != NULL) {
				124	/*
				125	* All the cgroups in the path from there to the
				126	* root must have a bfq_group for bfqd, so we don't
				127	* need any more allocations.
				128	*/
				129	break;
				130	}
				131
				132	bfqg = kzalloc(sizeof(*bfqg), GFP_ATOMIC);
				133	if (bfqg == NULL)
				134	goto cleanup;
				135
				136	bfq_group_init_entity(bgrp, bfqg);
				137	bfqg->my_entity = &bfqg->entity;
				138
				139	if (leaf == NULL) {
				140	leaf = bfqg;
				141	prev = leaf;
				142	} else {
				143	bfq_group_set_parent(prev, bfqg);
				144	/*
				145	* Build a list of allocated nodes using the bfqd
				146	* filed, that is still unused and will be
				147	* initialized only after the node will be
				148	* connected.
				149	*/
				150	prev->bfqd = bfqg;
				151	prev = bfqg;
				152	}
				153	}
				154
				155	return leaf;
				156
				157	cleanup:
				158	while (leaf != NULL) {
				159	prev = leaf;
				160	leaf = leaf->bfqd;
				161	kfree(prev);
				162	}
				163
				164	return NULL;
				165	}
				166
				167	/**
				168	* bfq_group_chain_link - link an allocated group chain to a cgroup
				169	* hierarchy.
				170	* @bfqd: the queue descriptor.
				171	* @cgroup: the leaf cgroup to start from.
				172	* @leaf: the leaf group (to be associated to @cgroup).
				173	*
				174	* Try to link a chain of groups to a cgroup hierarchy, connecting the
				175	* nodes bottom-up, so we can be sure that when we find a cgroup in the
				176	* hierarchy that already as a group associated to @bfqd all the nodes
				177	* in the path to the root cgroup have one too.
				178	*
				179	* On locking: the queue lock protects the hierarchy (there is a hierarchy
				180	* per device) while the bfqio_cgroup lock protects the list of groups
				181	* belonging to the same cgroup.
				182	*/
				183	static void bfq_group_chain_link(struct bfq_data bfqd, struct cgroup cgroup,
				184	struct bfq_group *leaf)
				185	{
				186	struct bfqio_cgroup *bgrp;
				187	struct bfq_group bfqg, next, *prev = NULL;
				188	unsigned long flags;
				189
				190	assert_spin_locked(bfqd->queue->queue_lock);
				191
				192	for (; cgroup != NULL && leaf != NULL; cgroup = cgroup->parent) {
				193	bgrp = cgroup_to_bfqio(cgroup);
				194	next = leaf->bfqd;
				195
				196	bfqg = bfqio_lookup_group(bgrp, bfqd);
				197	BUG_ON(bfqg != NULL);
				198
				199	spin_lock_irqsave(&bgrp->lock, flags);
				200
				201	rcu_assign_pointer(leaf->bfqd, bfqd);
				202	hlist_add_head_rcu(&leaf->group_node, &bgrp->group_data);
				203	hlist_add_head(&leaf->bfqd_node, &bfqd->group_list);
				204
				205	spin_unlock_irqrestore(&bgrp->lock, flags);
				206
				207	prev = leaf;
				208	leaf = next;
				209	}
				210
				211	BUG_ON(cgroup == NULL && leaf != NULL);
				212	if (cgroup != NULL && prev != NULL) {
				213	bgrp = cgroup_to_bfqio(cgroup);
				214	bfqg = bfqio_lookup_group(bgrp, bfqd);
				215	bfq_group_set_parent(prev, bfqg);
				216	}
				217	}
				218
				219	/**
				220	* bfq_find_alloc_group - return the group associated to @bfqd in @cgroup.
				221	* @bfqd: queue descriptor.
				222	* @cgroup: cgroup being searched for.
				223	*
				224	* Return a group associated to @bfqd in @cgroup, allocating one if
				225	* necessary. When a group is returned all the cgroups in the path
				226	* to the root have a group associated to @bfqd.
				227	*
				228	* If the allocation fails, return the root group: this breaks guarantees
				229	* but is a safe fallback. If this loss becomes a problem it can be
				230	* mitigated using the equivalent weight (given by the product of the
				231	* weights of the groups in the path from @group to the root) in the
				232	* root scheduler.
				233	*
				234	* We allocate all the missing nodes in the path from the leaf cgroup
				235	* to the root and we connect the nodes only after all the allocations
				236	* have been successful.
				237	*/
				238	static struct bfq_group bfq_find_alloc_group(struct bfq_data bfqd,
				239	struct cgroup *cgroup)
				240	{
				241	struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup);
				242	struct bfq_group *bfqg;
				243
				244	bfqg = bfqio_lookup_group(bgrp, bfqd);
				245	if (bfqg != NULL)
				246	return bfqg;
				247
				248	bfqg = bfq_group_chain_alloc(bfqd, cgroup);
				249	if (bfqg != NULL)
				250	bfq_group_chain_link(bfqd, cgroup, bfqg);
				251	else
				252	bfqg = bfqd->root_group;
				253
				254	return bfqg;
				255	}
				256
				257	/**
				258	* bfq_bfqq_move - migrate @bfqq to @bfqg.
				259	* @bfqd: queue descriptor.
				260	* @bfqq: the queue to move.
				261	* @entity: @bfqq's entity.
				262	* @bfqg: the group to move to.
				263	*
				264	* Move @bfqq to @bfqg, deactivating it from its old group and reactivating
				265	* it on the new one. Avoid putting the entity on the old group idle tree.
				266	*
				267	* Must be called under the queue lock; the cgroup owning @bfqg must
				268	* not disappear (by now this just means that we are called under
				269	* rcu_read_lock()).
				270	*/
				271	static void bfq_bfqq_move(struct bfq_data bfqd, struct bfq_queue bfqq,
				272	struct bfq_entity entity, struct bfq_group bfqg)
				273	{
				274	int busy, resume;
				275
				276	busy = bfq_bfqq_busy(bfqq);
				277	resume = !RB_EMPTY_ROOT(&bfqq->sort_list);
				278
				279	BUG_ON(resume && !entity->on_st);
				280	BUG_ON(busy && !resume && entity->on_st &&
				281	bfqq != bfqd->in_service_queue);
				282
				283	if (busy) {
				284	BUG_ON(atomic_read(&bfqq->ref) < 2);
				285
				286	if (!resume)
				287	bfq_del_bfqq_busy(bfqd, bfqq, 0);
				288	else
				289	bfq_deactivate_bfqq(bfqd, bfqq, 0);
				290	} else if (entity->on_st)
				291	bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
				292
				293	/*
				294	* Here we use a reference to bfqg. We don't need a refcounter
				295	* as the cgroup reference will not be dropped, so that its
				296	* destroy() callback will not be invoked.
				297	*/
				298	entity->parent = bfqg->my_entity;
				299	entity->sched_data = &bfqg->sched_data;
				300
				301	if (busy && resume)
				302	bfq_activate_bfqq(bfqd, bfqq);
				303
				304	if (bfqd->in_service_queue == NULL && !bfqd->rq_in_driver)
				305	bfq_schedule_dispatch(bfqd);
				306	}
				307
				308	/**
				309	* __bfq_bic_change_cgroup - move @bic to @cgroup.
				310	* @bfqd: the queue descriptor.
				311	* @bic: the bic to move.
				312	* @cgroup: the cgroup to move to.
				313	*
				314	* Move bic to cgroup, assuming that bfqd->queue is locked; the caller
				315	* has to make sure that the reference to cgroup is valid across the call.
				316	*
				317	* NOTE: an alternative approach might have been to store the current
				318	* cgroup in bfqq and getting a reference to it, reducing the lookup
				319	* time here, at the price of slightly more complex code.
				320	*/
				321	static struct bfq_group __bfq_bic_change_cgroup(struct bfq_data bfqd,
				322	struct bfq_io_cq *bic,
				323	struct cgroup *cgroup)
				324	{
				325	struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
				326	struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
				327	struct bfq_entity *entity;
				328	struct bfq_group *bfqg;
				329	struct bfqio_cgroup *bgrp;
				330
				331	bgrp = cgroup_to_bfqio(cgroup);
				332
				333	bfqg = bfq_find_alloc_group(bfqd, cgroup);
				334	if (async_bfqq != NULL) {
				335	entity = &async_bfqq->entity;
				336
				337	if (entity->sched_data != &bfqg->sched_data) {
				338	bic_set_bfqq(bic, NULL, 0);
				339	bfq_log_bfqq(bfqd, async_bfqq,
				340	"bic_change_group: %p %d",
				341	async_bfqq, atomic_read(&async_bfqq->ref));
				342	bfq_put_queue(async_bfqq);
				343	}
				344	}
				345
				346	if (sync_bfqq != NULL) {
				347	entity = &sync_bfqq->entity;
				348	if (entity->sched_data != &bfqg->sched_data)
				349	bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg);
				350	}
				351
				352	return bfqg;
				353	}
				354
				355	/**
				356	* bfq_bic_change_cgroup - move @bic to @cgroup.
				357	* @bic: the bic being migrated.
				358	* @cgroup: the destination cgroup.
				359	*
				360	* When the task owning @bic is moved to @cgroup, @bic is immediately
				361	* moved into its new parent group.
				362	*/
				363	static void bfq_bic_change_cgroup(struct bfq_io_cq *bic,
				364	struct cgroup *cgroup)
				365	{
				366	struct bfq_data *bfqd;
				367	unsigned long uninitialized_var(flags);
				368
				369	bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data),
				370	&flags);
				371	if (bfqd != NULL) {
				372	__bfq_bic_change_cgroup(bfqd, bic, cgroup);
				373	bfq_put_bfqd_unlock(bfqd, &flags);
				374	}
				375	}
				376
				377	/**
				378	* bfq_bic_update_cgroup - update the cgroup of @bic.
				379	* @bic: the @bic to update.
				380	*
				381	* Make sure that @bic is enqueued in the cgroup of the current task.
				382	* We need this in addition to moving bics during the cgroup attach
				383	* phase because the task owning @bic could be at its first disk
				384	* access or we may end up in the root cgroup as the result of a
				385	* memory allocation failure and here we try to move to the right
				386	* group.
				387	*
				388	* Must be called under the queue lock. It is safe to use the returned
				389	* value even after the rcu_read_unlock() as the migration/destruction
				390	* paths act under the queue lock too. IOW it is impossible to race with
				391	* group migration/destruction and end up with an invalid group as:
				392	* a) here cgroup has not yet been destroyed, nor its destroy callback
				393	* has started execution, as current holds a reference to it,
				394	* b) if it is destroyed after rcu_read_unlock() [after current is
				395	* migrated to a different cgroup] its attach() callback will have
				396	* taken care of remove all the references to the old cgroup data.
				397	*/
				398	static struct bfq_group bfq_bic_update_cgroup(struct bfq_io_cq bic)
				399	{
				400	struct bfq_data *bfqd = bic_to_bfqd(bic);
				401	struct bfq_group *bfqg;
				402	struct cgroup *cgroup;
				403
				404	BUG_ON(bfqd == NULL);
				405
				406	rcu_read_lock();
				407	cgroup = task_cgroup(current, bfqio_subsys_id);
				408	bfqg = __bfq_bic_change_cgroup(bfqd, bic, cgroup);
				409	rcu_read_unlock();
				410
				411	return bfqg;
				412	}
				413
				414	/**
				415	* bfq_flush_idle_tree - deactivate any entity on the idle tree of @st.
				416	* @st: the service tree being flushed.
				417	*/
				418	static inline void bfq_flush_idle_tree(struct bfq_service_tree *st)
				419	{
				420	struct bfq_entity *entity = st->first_idle;
				421
				422	for (; entity != NULL; entity = st->first_idle)
				423	__bfq_deactivate_entity(entity, 0);
				424	}
				425
				426	/**
				427	* bfq_reparent_leaf_entity - move leaf entity to the root_group.
				428	* @bfqd: the device data structure with the root group.
				429	* @entity: the entity to move.
				430	*/
				431	static inline void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
				432	struct bfq_entity *entity)
				433	{
				434	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
				435
				436	BUG_ON(bfqq == NULL);
				437	bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group);
				438	return;
				439	}
				440
				441	/**
				442	* bfq_reparent_active_entities - move to the root group all active
				443	* entities.
				444	* @bfqd: the device data structure with the root group.
				445	* @bfqg: the group to move from.
				446	* @st: the service tree with the entities.
				447	*
				448	* Needs queue_lock to be taken and reference to be valid over the call.
				449	*/
				450	static inline void bfq_reparent_active_entities(struct bfq_data *bfqd,
				451	struct bfq_group *bfqg,
				452	struct bfq_service_tree *st)
				453	{
				454	struct rb_root *active = &st->active;
				455	struct bfq_entity *entity = NULL;
				456
				457	if (!RB_EMPTY_ROOT(&st->active))
				458	entity = bfq_entity_of(rb_first(active));
				459
				460	for (; entity != NULL; entity = bfq_entity_of(rb_first(active)))
				461	bfq_reparent_leaf_entity(bfqd, entity);
				462
				463	if (bfqg->sched_data.in_service_entity != NULL)
				464	bfq_reparent_leaf_entity(bfqd,
				465	bfqg->sched_data.in_service_entity);
				466
				467	return;
				468	}
				469
				470	/**
				471	* bfq_destroy_group - destroy @bfqg.
				472	* @bgrp: the bfqio_cgroup containing @bfqg.
				473	* @bfqg: the group being destroyed.
				474	*
				475	* Destroy @bfqg, making sure that it is not referenced from its parent.
				476	*/
				477	static void bfq_destroy_group(struct bfqio_cgroup bgrp, struct bfq_group bfqg)
				478	{
				479	struct bfq_data *bfqd;
				480	struct bfq_service_tree *st;
				481	struct bfq_entity *entity = bfqg->my_entity;
				482	unsigned long uninitialized_var(flags);
				483	int i;
				484
				485	hlist_del(&bfqg->group_node);
				486
				487	/*
				488	* Empty all service_trees belonging to this group before
				489	* deactivating the group itself.
				490	*/
				491	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) {
				492	st = bfqg->sched_data.service_tree + i;
				493
				494	/*
				495	* The idle tree may still contain bfq_queues belonging
				496	* to exited task because they never migrated to a different
				497	* cgroup from the one being destroyed now. No one else
				498	* can access them so it's safe to act without any lock.
				499	*/
				500	bfq_flush_idle_tree(st);
				501
				502	/*
				503	* It may happen that some queues are still active
				504	* (busy) upon group destruction (if the corresponding
				505	* processes have been forced to terminate). We move
				506	* all the leaf entities corresponding to these queues
				507	* to the root_group.
				508	* Also, it may happen that the group has an entity
				509	* in service, which is disconnected from the active
				510	* tree: it must be moved, too.
				511	* There is no need to put the sync queues, as the
				512	* scheduler has taken no reference.
				513	*/
				514	bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags);
				515	if (bfqd != NULL) {
				516	bfq_reparent_active_entities(bfqd, bfqg, st);
				517	bfq_put_bfqd_unlock(bfqd, &flags);
				518	}
				519	BUG_ON(!RB_EMPTY_ROOT(&st->active));
				520	BUG_ON(!RB_EMPTY_ROOT(&st->idle));
				521	}
				522	BUG_ON(bfqg->sched_data.next_in_service != NULL);
				523	BUG_ON(bfqg->sched_data.in_service_entity != NULL);
				524
				525	/*
				526	* We may race with device destruction, take extra care when
				527	* dereferencing bfqg->bfqd.
				528	*/
				529	bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags);
				530	if (bfqd != NULL) {
				531	hlist_del(&bfqg->bfqd_node);
				532	__bfq_deactivate_entity(entity, 0);
				533	bfq_put_async_queues(bfqd, bfqg);
				534	bfq_put_bfqd_unlock(bfqd, &flags);
				535	}
				536	BUG_ON(entity->tree != NULL);
				537
				538	/*
				539	* No need to defer the kfree() to the end of the RCU grace
				540	* period: we are called from the destroy() callback of our
				541	* cgroup, so we can be sure that no one is a) still using
				542	* this cgroup or b) doing lookups in it.
				543	*/
				544	kfree(bfqg);
				545	}
				546
				547	static void bfq_end_wr_async(struct bfq_data *bfqd)
				548	{
				549	struct hlist_node pos, n;
				550	struct bfq_group *bfqg;
				551
				552	hlist_for_each_entry_safe(bfqg, pos, n, &bfqd->group_list, bfqd_node)
				553	bfq_end_wr_async_queues(bfqd, bfqg);
				554	bfq_end_wr_async_queues(bfqd, bfqd->root_group);
				555	}
				556
				557	/**
				558	* bfq_disconnect_groups - disconnect @bfqd from all its groups.
				559	* @bfqd: the device descriptor being exited.
				560	*
				561	* When the device exits we just make sure that no lookup can return
				562	* the now unused group structures. They will be deallocated on cgroup
				563	* destruction.
				564	*/
				565	static void bfq_disconnect_groups(struct bfq_data *bfqd)
				566	{
				567	struct hlist_node pos, n;
				568	struct bfq_group *bfqg;
				569
				570	bfq_log(bfqd, "disconnect_groups beginning");
				571	hlist_for_each_entry_safe(bfqg, pos, n, &bfqd->group_list, bfqd_node) {
				572	hlist_del(&bfqg->bfqd_node);
				573
				574	__bfq_deactivate_entity(bfqg->my_entity, 0);
				575
				576	/*
				577	* Don't remove from the group hash, just set an
				578	* invalid key. No lookups can race with the
				579	* assignment as bfqd is being destroyed; this
				580	* implies also that new elements cannot be added
				581	* to the list.
				582	*/
				583	rcu_assign_pointer(bfqg->bfqd, NULL);
				584
				585	bfq_log(bfqd, "disconnect_groups: put async for group %p",
				586	bfqg);
				587	bfq_put_async_queues(bfqd, bfqg);
				588	}
				589	}
				590
				591	static inline void bfq_free_root_group(struct bfq_data *bfqd)
				592	{
				593	struct bfqio_cgroup *bgrp = &bfqio_root_cgroup;
				594	struct bfq_group *bfqg = bfqd->root_group;
				595
				596	bfq_put_async_queues(bfqd, bfqg);
				597
				598	spin_lock_irq(&bgrp->lock);
				599	hlist_del_rcu(&bfqg->group_node);
				600	spin_unlock_irq(&bgrp->lock);
				601
				602	/*
				603	* No need to synchronize_rcu() here: since the device is gone
				604	* there cannot be any read-side access to its root_group.
				605	*/
				606	kfree(bfqg);
				607	}
				608
				609	static struct bfq_group bfq_alloc_root_group(struct bfq_data bfqd, int node)
				610	{
				611	struct bfq_group *bfqg;
				612	struct bfqio_cgroup *bgrp;
				613	int i;
				614
				615	bfqg = kzalloc_node(sizeof(*bfqg), GFP_KERNEL, node);
				616	if (bfqg == NULL)
				617	return NULL;
				618
				619	bfqg->entity.parent = NULL;
				620	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
				621	bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
				622
				623	bgrp = &bfqio_root_cgroup;
				624	spin_lock_irq(&bgrp->lock);
				625	rcu_assign_pointer(bfqg->bfqd, bfqd);
				626	hlist_add_head_rcu(&bfqg->group_node, &bgrp->group_data);
				627	spin_unlock_irq(&bgrp->lock);
				628
				629	return bfqg;
				630	}
				631
				632	#define SHOW_FUNCTION(__VAR) \
				633	static u64 bfqio_cgroup_##__VAR##_read(struct cgroup *cgroup, \
				634	struct cftype *cftype) \
				635	{ \
				636	struct bfqio_cgroup *bgrp; \
				637	u64 ret; \
				638	\
				639	if (!cgroup_lock_live_group(cgroup)) \
				640	return -ENODEV; \
				641	\
				642	bgrp = cgroup_to_bfqio(cgroup); \
				643	spin_lock_irq(&bgrp->lock); \
				644	ret = bgrp->__VAR; \
				645	spin_unlock_irq(&bgrp->lock); \
				646	\
				647	cgroup_unlock(); \
				648	\
				649	return ret; \
				650	}
				651
				652	SHOW_FUNCTION(weight);
				653	SHOW_FUNCTION(ioprio);
				654	SHOW_FUNCTION(ioprio_class);
				655	#undef SHOW_FUNCTION
				656
				657	#define STORE_FUNCTION(__VAR, __MIN, __MAX) \
				658	static int bfqio_cgroup_##__VAR##_write(struct cgroup *cgroup, \
				659	struct cftype *cftype, \
				660	u64 val) \
				661	{ \
				662	struct bfqio_cgroup *bgrp; \
				663	struct bfq_group *bfqg; \
				664	struct hlist_node *n; \
				665	\
				666	if (val < (__MIN) \|\| val > (__MAX)) \
				667	return -EINVAL; \
				668	\
				669	if (!cgroup_lock_live_group(cgroup)) \
				670	return -ENODEV; \
				671	\
				672	bgrp = cgroup_to_bfqio(cgroup); \
				673	\
				674	spin_lock_irq(&bgrp->lock); \
				675	bgrp->__VAR = (unsigned short)val; \
				676	hlist_for_each_entry(bfqg, n, &bgrp->group_data, group_node) { \
				677	/* \
				678	* Setting the ioprio_changed flag of the entity \
				679	* to 1 with new_##__VAR == ##__VAR would re-set \
				680	* the value of the weight to its ioprio mapping. \
				681	* Set the flag only if necessary. \
				682	*/ \
				683	if ((unsigned short)val != bfqg->entity.new_##__VAR) { \
				684	bfqg->entity.new_##__VAR = (unsigned short)val; \
				685	/* \
				686	* Make sure that the above new value has been \
				687	* stored in bfqg->entity.new_##__VAR before \
				688	* setting the ioprio_changed flag. In fact, \
				689	* this flag may be read asynchronously (in \
				690	* critical sections protected by a different \
				691	* lock than that held here), and finding this \
				692	* flag set may cause the execution of the code \
				693	* for updating parameters whose value may \
				694	* depend also on bfqg->entity.new_##__VAR (in \
				695	* __bfq_entity_update_weight_prio). \
				696	* This barrier makes sure that the new value \
				697	* of bfqg->entity.new_##__VAR is correctly \
				698	* seen in that code. \
				699	*/ \
				700	smp_wmb(); \
				701	bfqg->entity.ioprio_changed = 1; \
				702	} \
				703	} \
				704	spin_unlock_irq(&bgrp->lock); \
				705	\
				706	cgroup_unlock(); \
				707	\
				708	return 0; \
				709	}
				710
				711	STORE_FUNCTION(weight, BFQ_MIN_WEIGHT, BFQ_MAX_WEIGHT);
				712	STORE_FUNCTION(ioprio, 0, IOPRIO_BE_NR - 1);
				713	STORE_FUNCTION(ioprio_class, IOPRIO_CLASS_RT, IOPRIO_CLASS_IDLE);
				714	#undef STORE_FUNCTION
				715
				716	static struct cftype bfqio_files[] = {
				717	{
				718	.name = "weight",
				719	.read_u64 = bfqio_cgroup_weight_read,
				720	.write_u64 = bfqio_cgroup_weight_write,
				721	},
				722	{
				723	.name = "ioprio",
				724	.read_u64 = bfqio_cgroup_ioprio_read,
				725	.write_u64 = bfqio_cgroup_ioprio_write,
				726	},
				727	{
				728	.name = "ioprio_class",
				729	.read_u64 = bfqio_cgroup_ioprio_class_read,
				730	.write_u64 = bfqio_cgroup_ioprio_class_write,
				731	},
				732	};
				733
				734	static int bfqio_populate(struct cgroup_subsys subsys, struct cgroup cgroup)
				735	{
				736	return cgroup_add_files(cgroup, subsys, bfqio_files,
				737	ARRAY_SIZE(bfqio_files));
				738	}
				739
				740	static struct cgroup_subsys_state bfqio_create(struct cgroup cgroup)
				741	{
				742	struct bfqio_cgroup *bgrp;
				743
				744	if (cgroup->parent != NULL) {
				745	bgrp = kzalloc(sizeof(*bgrp), GFP_KERNEL);
				746	if (bgrp == NULL)
				747	return ERR_PTR(-ENOMEM);
				748	} else
				749	bgrp = &bfqio_root_cgroup;
				750
				751	spin_lock_init(&bgrp->lock);
				752	INIT_HLIST_HEAD(&bgrp->group_data);
				753	bgrp->ioprio = BFQ_DEFAULT_GRP_IOPRIO;
				754	bgrp->ioprio_class = BFQ_DEFAULT_GRP_CLASS;
				755
				756	return &bgrp->css;
				757	}
				758
				759	/*
				760	* We cannot support shared io contexts, as we have no means to support
				761	* two tasks with the same ioc in two different groups without major rework
				762	* of the main bic/bfqq data structures. By now we allow a task to change
				763	* its cgroup only if it's the only owner of its ioc; the drawback of this
				764	* behavior is that a group containing a task that forked using CLONE_IO
				765	* will not be destroyed until the tasks sharing the ioc die.
				766	*/
				767	static int bfqio_can_attach(struct cgroup cgroup, struct cgroup_taskset tset)
				768	{
				769	struct task_struct *task;
				770	struct io_context *ioc;
				771	int ret = 0;
				772
				773	cgroup_taskset_for_each(task, cgroup, tset) {
				774	/* task_lock() is needed to avoid races with exit_io_context() */
				775	task_lock(task);
				776	ioc = task->io_context;
				777	if (ioc != NULL && atomic_read(&ioc->nr_tasks) > 1)
				778	/*
				779	* ioc == NULL means that the task is either too
				780	* young or exiting: if it has still no ioc the
				781	* ioc can't be shared, if the task is exiting the
				782	* attach will fail anyway, no matter what we
				783	* return here.
				784	*/
				785	ret = -EINVAL;
				786	task_unlock(task);
				787	if (ret)
				788	break;
				789	}
				790
				791	return ret;
				792	}
				793
				794	static void bfqio_attach(struct cgroup cgroup, struct cgroup_taskset tset)
				795	{
				796	struct task_struct *task;
				797	struct io_context *ioc;
				798	struct io_cq *icq;
				799	struct hlist_node *n;
				800
				801	/*
				802	* IMPORTANT NOTE: The move of more than one process at a time to a
				803	* new group has not yet been tested.
				804	*/
				805	cgroup_taskset_for_each(task, cgroup, tset) {
				806	ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
				807	if (ioc) {
				808	/*
				809	* Handle cgroup change here.
				810	*/
				811	rcu_read_lock();
				812	hlist_for_each_entry_rcu(icq, n, &ioc->icq_list, ioc_node)
				813	if (!strncmp(
				814	icq->q->elevator->type->elevator_name,
				815	"bfq", ELV_NAME_MAX))
				816	bfq_bic_change_cgroup(icq_to_bic(icq),
				817	cgroup);
				818	rcu_read_unlock();
				819	put_io_context(ioc);
				820	}
				821	}
				822	}
				823
				824	static void bfqio_destroy(struct cgroup *cgroup)
				825	{
				826	struct bfqio_cgroup *bgrp = cgroup_to_bfqio(cgroup);
				827	struct hlist_node n, tmp;
				828	struct bfq_group *bfqg;
				829
				830	/*
				831	* Since we are destroying the cgroup, there are no more tasks
				832	* referencing it, and all the RCU grace periods that may have
				833	* referenced it are ended (as the destruction of the parent
				834	* cgroup is RCU-safe); bgrp->group_data will not be accessed by
				835	* anything else and we don't need any synchronization.
				836	*/
				837	hlist_for_each_entry_safe(bfqg, n, tmp, &bgrp->group_data, group_node)
				838	bfq_destroy_group(bgrp, bfqg);
				839
				840	BUG_ON(!hlist_empty(&bgrp->group_data));
				841
				842	kfree(bgrp);
				843	}
				844
				845	struct cgroup_subsys bfqio_subsys = {
				846	.name = "bfqio",
				847	.create = bfqio_create,
				848	.can_attach = bfqio_can_attach,
				849	.attach = bfqio_attach,
				850	.destroy = bfqio_destroy,
				851	.populate = bfqio_populate,
				852	.subsys_id = bfqio_subsys_id,
				853	};
				854	#else
				855	static inline void bfq_init_entity(struct bfq_entity *entity,
				856	struct bfq_group *bfqg)
				857	{
				858	entity->weight = entity->new_weight;
				859	entity->orig_weight = entity->new_weight;
				860	entity->ioprio = entity->new_ioprio;
				861	entity->ioprio_class = entity->new_ioprio_class;
				862	entity->sched_data = &bfqg->sched_data;
				863	}
				864
				865	static inline struct bfq_group *
				866	bfq_bic_update_cgroup(struct bfq_io_cq *bic)
				867	{
				868	struct bfq_data *bfqd = bic_to_bfqd(bic);
				869	return bfqd->root_group;
				870	}
				871
				872	static inline void bfq_bfqq_move(struct bfq_data *bfqd,
				873	struct bfq_queue *bfqq,
				874	struct bfq_entity *entity,
				875	struct bfq_group *bfqg)
				876	{
				877	}
				878
				879	static void bfq_end_wr_async(struct bfq_data *bfqd)
				880	{
				881	bfq_end_wr_async_queues(bfqd, bfqd->root_group);
				882	}
				883
				884	static inline void bfq_disconnect_groups(struct bfq_data *bfqd)
				885	{
				886	bfq_put_async_queues(bfqd, bfqd->root_group);
				887	}
				888
				889	static inline void bfq_free_root_group(struct bfq_data *bfqd)
				890	{
				891	kfree(bfqd->root_group);
				892	}
				893
				894	static struct bfq_group bfq_alloc_root_group(struct bfq_data bfqd, int node)
				895	{
				896	struct bfq_group *bfqg;
				897	int i;
				898
				899	bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL \| __GFP_ZERO, node);
				900	if (bfqg == NULL)
				901	return NULL;
				902
				903	for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
				904	bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
				905
				906	return bfqg;
				907	}
				908	#endif